From 69e55430e9eb2f6df76f2709a744b388d69d567b Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Wed, 9 Mar 2022 16:43:21 +0800
Subject: [PATCH 001/340] block: add a switch for precise iostat accounting

hulk inclusion
category: bugfix
bugzilla: 39265, https://gitee.com/openeuler/kernel/issues/I4WC06
CVE: NA

-----------------------------------------------

When the inflight IOs are slow and no new IOs are issued, we expect
iostat could manifest the IO hang problem. However after
commit 9c6dea45e6f7 ("block: delete part_round_stats and switch to less
precise counting"), io_tick and time_in_queue will not be updated until
the end of IO, and the avgqu-sz and %util columns of iostat will be zero.

To fix it, we could fallback to the implementation before commit
9c6dea45e6f7, but it may cause performance regression on NVMe device
or bio-based device (due to overhead of inflight calculation),
so add a switch to control whether or not to use precise iostat
accounting. It can be enabled by adding "precise_iostat=1" in kernel
boot cmdline. When precise accouting is enabled, io_tick and time_in_queue
will be updated when accessing /proc/diskstats and
/sys/block/sdX/sdXN/stat.

Fixes: 9c6dea45e6f7 ("block: delete part_round_stats and switch to less precise counting")
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 block/bio.c               |  8 ++++++--
 block/blk-core.c          | 30 +++++++++++++++++++++++++++---
 block/blk-merge.c         |  2 ++
 block/genhd.c             |  7 +++++++
 block/partition-generic.c |  8 ++++++++
 include/linux/blkdev.h    |  1 +
 6 files changed, 51 insertions(+), 5 deletions(-)

diff --git a/block/bio.c b/block/bio.c
index d94243411ef3..b50d3b59c79b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1706,9 +1706,13 @@ void generic_end_io_acct(struct request_queue *q, int req_op,
 	const int sgrp = op_stat_group(req_op);
 	int cpu = part_stat_lock();
 
-	update_io_ticks(cpu, part, now);
+	if (precise_iostat) {
+		part_round_stats(q, cpu, part);
+	} else {
+		update_io_ticks(cpu, part, now);
+		part_stat_add(cpu, part, time_in_queue, duration);
+	}
 	part_stat_add(cpu, part, nsecs[sgrp], jiffies_to_nsecs(duration));
-	part_stat_add(cpu, part, time_in_queue, duration);
 	part_dec_in_flight(q, part, op_is_write(req_op));
 
 	part_stat_unlock();
diff --git a/block/blk-core.c b/block/blk-core.c
index 41d0b09e9a67..df733e8caa6a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -56,6 +56,20 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
 
 DEFINE_IDA(blk_queue_ida);
 
+bool precise_iostat;
+static int __init precise_iostat_setup(char *str)
+{
+	bool precise;
+
+	if (!strtobool(str, &precise)) {
+		precise_iostat = precise;
+		pr_info("precise iostat %d\n", precise_iostat);
+	}
+
+	return 1;
+}
+__setup("precise_iostat=", precise_iostat_setup);
+
 /*
  * For the allocated request tables
  */
@@ -1700,8 +1714,13 @@ static void part_round_stats_single(struct request_queue *q, int cpu,
 				    struct hd_struct *part, unsigned long now,
 				    unsigned int inflight)
 {
-	if (inflight)
+	if (inflight) {
+		if (precise_iostat) {
+			__part_stat_add(cpu, part, time_in_queue,
+					inflight * (now - part->stamp));
+		}
 		__part_stat_add(cpu, part, io_ticks, (now - part->stamp));
+	}
 	part->stamp = now;
 }
 
@@ -2771,10 +2790,15 @@ void blk_account_io_done(struct request *req, u64 now)
 		cpu = part_stat_lock();
 		part = req->part;
 
-		update_io_ticks(cpu, part, jiffies);
+		if (!precise_iostat) {
+			update_io_ticks(cpu, part, jiffies);
+			part_stat_add(cpu, part, time_in_queue,
+				nsecs_to_jiffies64(now - req->start_time_ns));
+		} else {
+			part_round_stats(req->q, cpu, part);
+		}
 		part_stat_inc(cpu, part, ios[sgrp]);
 		part_stat_add(cpu, part, nsecs[sgrp], now - req->start_time_ns);
-		part_stat_add(cpu, part, time_in_queue, nsecs_to_jiffies64(now - req->start_time_ns));
 		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
 		hd_struct_put(part);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 4c17c1031e34..d2fabe1fdf32 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -669,6 +669,8 @@ static void blk_account_io_merge(struct request *req)
 		cpu = part_stat_lock();
 		part = req->part;
 
+		if (precise_iostat)
+			part_round_stats(req->q, cpu, part);
 		part_dec_in_flight(req->q, part, rq_data_dir(req));
 
 		hd_struct_put(part);
diff --git a/block/genhd.c b/block/genhd.c
index 183612cbbd6b..e7b97fdb4173 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1352,6 +1352,7 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 	struct hd_struct *hd;
 	char buf[BDEVNAME_SIZE];
 	unsigned int inflight[2];
+	int cpu;
 
 	/*
 	if (&disk_to_dev(gp)->kobj.entry == block_class.devices.next)
@@ -1363,6 +1364,12 @@ static int diskstats_show(struct seq_file *seqf, void *v)
 
 	disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0);
 	while ((hd = disk_part_iter_next(&piter))) {
+		if (precise_iostat) {
+			cpu = part_stat_lock();
+			part_round_stats(gp->queue, cpu, hd);
+			part_stat_unlock();
+		}
+
 		part_in_flight(gp->queue, hd, inflight);
 		seq_printf(seqf, "%4d %7d %s "
 			   "%lu %lu %lu %u "
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 739c0cc5fd22..c4ac7a8c77dc 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -18,6 +18,7 @@
 #include <linux/ctype.h>
 #include <linux/genhd.h>
 #include <linux/blktrace_api.h>
+#include <linux/blkdev.h>
 
 #include "partitions/check.h"
 
@@ -121,6 +122,13 @@ ssize_t part_stat_show(struct device *dev,
 	struct hd_struct *p = dev_to_part(dev);
 	struct request_queue *q = part_to_disk(p)->queue;
 	unsigned int inflight[2];
+	int cpu;
+
+	if (precise_iostat) {
+		cpu = part_stat_lock();
+		part_round_stats(q, cpu, p);
+		part_stat_unlock();
+	}
 
 	part_in_flight(q, p, inflight);
 	return sprintf(buf,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 28ea02865ecc..a86659e78d98 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -28,6 +28,7 @@
 #include <linux/scatterlist.h>
 #include <linux/blkzoned.h>
 
+extern bool precise_iostat;
 struct module;
 struct scsi_ioctl_command;
 
-- 
Gitee


From 7165cb45a71e81414304ee7e15f12a5aa9e0ccdf Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Wed, 9 Mar 2022 16:43:22 +0800
Subject: [PATCH 002/340] block: account inflight from blk_account_io_start()
 if 'precise_iostat' is set

hulk inclusion
category: bugfix
bugzilla: 39265, https://gitee.com/openeuler/kernel/issues/I4WC06
CVE: NA

-----------------------------------------------

When 'precise_iostat' is set, io_tick and time_in_queue will be updated
if io is inflight. However, mq will not account io as inflight until
getting driver tag, while sq will account it after getting sched tag.

On the other hand, if io scheduler is none, blk_mq_get_tag() will get
driver tag directly, and such io will be accounted as inflight. The
consequences is that 'io_tick' will be miscalculated in
part_round_stats().

Thus revert to sq's implementation if 'precise_iostat' is set.

Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 block/genhd.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index e7b97fdb4173..fc24384e2c85 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -24,6 +24,7 @@
 
 #include "blk.h"
 
+extern bool precise_iostat;
 static DEFINE_MUTEX(block_class_lock);
 struct kobject *block_depr;
 
@@ -47,7 +48,7 @@ static void disk_release_events(struct gendisk *disk);
 
 void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
 {
-	if (q->mq_ops)
+	if (!precise_iostat && q->mq_ops)
 		return;
 
 	atomic_inc(&part->in_flight[rw]);
@@ -57,7 +58,7 @@ void part_inc_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
 
 void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
 {
-	if (q->mq_ops)
+	if (!precise_iostat && q->mq_ops)
 		return;
 
 	atomic_dec(&part->in_flight[rw]);
@@ -68,7 +69,7 @@ void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, int rw)
 void part_in_flight(struct request_queue *q, struct hd_struct *part,
 		    unsigned int inflight[2])
 {
-	if (q->mq_ops) {
+	if (!precise_iostat && q->mq_ops) {
 		blk_mq_in_flight(q, part, inflight);
 		return;
 	}
@@ -85,7 +86,7 @@ void part_in_flight(struct request_queue *q, struct hd_struct *part,
 void part_in_flight_rw(struct request_queue *q, struct hd_struct *part,
 		       unsigned int inflight[2])
 {
-	if (q->mq_ops) {
+	if (!precise_iostat && q->mq_ops) {
 		blk_mq_in_flight_rw(q, part, inflight);
 		return;
 	}
-- 
Gitee


From 98aa2e7ecb2b2ea862aa8b327d2c57521f89b289 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Wed, 9 Mar 2022 16:43:23 +0800
Subject: [PATCH 003/340] dm multipath: fix missing blk_account_io_done() in
 error path

hulk inclusion
category: bugfix
bugzilla: 186143, https://gitee.com/openeuler/kernel/issues/I4WC06
CVE: NA

-----------------------------------------------

If precise_iostat is enabled, inflight will be recorded and cleared by
atomic operations. However, for dm multipath, inflight will be recorded
by dm_requeue_original_request(), and if some error happened,
multipath_release_clone() won't clear inflight, which will cause
inflight to leak. Furthermore, %util will always be 100 in iostat.

Fix the problem by calling __blk_mq_end_request() in
multipath_release_clone() instead.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
---
 block/blk-core.c       | 22 ++++++++++++++++++++--
 block/blk.h            |  9 +++++++++
 drivers/md/dm-rq.c     |  2 +-
 include/linux/blkdev.h |  2 ++
 4 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index df733e8caa6a..a5d80ab91170 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2669,8 +2669,10 @@ static blk_status_t blk_cloned_rq_check_limits(struct request_queue *q,
  * blk_insert_cloned_request - Helper for stacking drivers to submit a request
  * @q:  the queue to submit the request
  * @rq: the request being queued
+ * @precise: true if io account with start and done will be balanced
  */
-blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+blk_status_t __blk_insert_cloned_request(struct request_queue *q,
+					struct request *rq, bool precise)
 {
 	unsigned long flags;
 	int where = ELEVATOR_INSERT_BACK;
@@ -2693,7 +2695,16 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 		 * bypass a potential scheduler on the bottom device for
 		 * insert.
 		 */
-		return blk_mq_request_issue_directly(rq);
+		ret = blk_mq_request_issue_directly(rq);
+		if (ret && precise) {
+			u64 now = 0;
+
+			if (blk_mq_need_time_stamp(rq))
+				now = ktime_get_ns();
+
+			blk_account_io_done(rq, now);
+		}
+		return ret;
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
@@ -2718,6 +2729,13 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 
 	return BLK_STS_OK;
 }
+EXPORT_SYMBOL_GPL(__blk_insert_cloned_request);
+
+blk_status_t blk_insert_cloned_request(struct request_queue *q,
+					struct request *rq)
+{
+	return __blk_insert_cloned_request(q, rq, false);
+}
 EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
 
 /**
diff --git a/block/blk.h b/block/blk.h
index e496e26630f7..dde2141a32dd 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -405,6 +405,15 @@ static inline struct io_context *create_io_context(gfp_t gfp_mask, int node)
 	return current->io_context;
 }
 
+/*
+ * Only need start/end time stamping if we have stats enabled, or using
+ * an IO scheduler.
+ */
+static inline bool blk_mq_need_time_stamp(struct request *rq)
+{
+	return (rq->rq_flags & RQF_IO_STAT) || rq->q->elevator;
+}
+
 /*
  * Internal throttling interface
  */
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 80683f2b723d..3bd805f7ce85 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -410,7 +410,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ
 		clone->rq_flags |= RQF_IO_STAT;
 
 	clone->start_time_ns = ktime_get_ns();
-	r = blk_insert_cloned_request(clone->q, clone);
+	r = __blk_insert_cloned_request(clone->q, clone, true);
 	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE)
 		/* must complete clone in terms of original request */
 		dm_complete_request(rq, r);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a86659e78d98..1deaf36eb237 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1012,6 +1012,8 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src,
 extern void blk_rq_unprep_clone(struct request *rq);
 extern blk_status_t blk_insert_cloned_request(struct request_queue *q,
 				     struct request *rq);
+extern blk_status_t __blk_insert_cloned_request(struct request_queue *q,
+					struct request *rq, bool precise);
 extern int blk_rq_append_bio(struct request *rq, struct bio **bio);
 extern void blk_delay_queue(struct request_queue *, unsigned long);
 extern void blk_queue_split(struct request_queue *, struct bio **);
-- 
Gitee


From 9fefd49137cf42a0ba00fa98aa815ec209245840 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 11 Mar 2022 10:43:36 +0800
Subject: [PATCH 004/340] io_uring: re-issue block requests that failed because
 of resources

mainline inclusion
from mainline-v5.9-rc1
commit b63534c41e20b474483b4ddf47efc858c17352e0
category: bugfix
bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
CVE: NA

-------------------------------------------------

Mark the plug with nowait == true, which will cause requests to avoid
blocking on request allocation. If they do, we catch them and reissue
them from a task_work based handler.

Normally we can catch -EAGAIN directly, but the hard case is for split
requests. As an example, the application issues a 512KB request. The
block core will split this into 128KB if that's the max size for the
device. The first request issues just fine, but we run into -EAGAIN for
some latter splits for the same request. As the bio is split, we don't
get to see the -EAGAIN until one of the actual reads complete, and hence
we cannot handle it inline as part of submission.

This does potentially cause re-reads of parts of the range, as the whole
request is reissued. There's currently no better way to handle this.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
conflict:
        fs/io_uring.c
Adding nowait to plug list is reverted in (62c774ed4831 ("io_uring: don't
unconditionally set plug->nowait = true"))
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/io_uring.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 97 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index f933d4f0edb4..aeedf191b813 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -917,6 +917,13 @@ static void __io_queue_sqe(struct io_kiocb *req,
 			   const struct io_uring_sqe *sqe,
 			   struct io_comp_state *cs);
 
+static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
+			       struct iovec **iovec, struct iov_iter *iter,
+			       bool needs_lock);
+static int io_setup_async_rw(struct io_kiocb *req, ssize_t io_size,
+			     struct iovec *iovec, struct iovec *fast_iov,
+			     struct iov_iter *iter);
+
 static struct kmem_cache *req_cachep;
 
 static const struct file_operations io_uring_fops;
@@ -2363,10 +2370,90 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res,
 	__io_req_complete(req, res, cflags, cs);
 }
 
+#ifdef CONFIG_BLOCK
+static bool io_resubmit_prep(struct io_kiocb *req, int error)
+{
+	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
+	ssize_t ret = -ECANCELED;
+	struct iov_iter iter;
+	int rw;
+
+	if (error) {
+		ret = error;
+		goto end_req;
+	}
+
+	switch (req->opcode) {
+	case IORING_OP_READV:
+	case IORING_OP_READ_FIXED:
+	case IORING_OP_READ:
+		rw = READ;
+		break;
+	case IORING_OP_WRITEV:
+	case IORING_OP_WRITE_FIXED:
+	case IORING_OP_WRITE:
+		rw = WRITE;
+		break;
+	default:
+		printk_once(KERN_WARNING "io_uring: bad opcode in resubmit %d\n",
+				req->opcode);
+		goto end_req;
+	}
+
+	ret = io_import_iovec(rw, req, &iovec, &iter, false);
+	if (ret < 0)
+		goto end_req;
+	ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+	if (!ret)
+		return true;
+	kfree(iovec);
+end_req:
+	io_cqring_add_event(req, ret, 0);
+	req_set_fail_links(req);
+	io_put_req(req);
+	return false;
+}
+
+static void io_rw_resubmit(struct callback_head *cb)
+{
+	struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
+	struct io_ring_ctx *ctx = req->ctx;
+	int err;
+
+	__set_current_state(TASK_RUNNING);
+
+	err = io_sq_thread_acquire_mm_files(ctx, req);
+
+	if (io_resubmit_prep(req, err)) {
+		refcount_inc(&req->refs);
+		io_queue_async_work(req);
+	}
+}
+#endif
+
+static bool io_rw_reissue(struct io_kiocb *req, long res)
+{
+#ifdef CONFIG_BLOCK
+	struct task_struct *tsk;
+	int ret;
+
+	if ((res != -EAGAIN && res != -EOPNOTSUPP) || io_wq_current_is_worker())
+		return false;
+
+	tsk = req->task;
+	init_task_work(&req->task_work, io_rw_resubmit);
+	ret = task_work_add(tsk, &req->task_work, true);
+	if (!ret)
+		return true;
+#endif
+	return false;
+}
+
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 			     struct io_comp_state *cs)
 {
-	io_complete_rw_common(&req->rw.kiocb, res, cs);
+	if (!io_rw_reissue(req, res))
+		io_complete_rw_common(&req->rw.kiocb, res, cs);
 }
 
 static void io_complete_rw(struct kiocb *kiocb, long res, long res2)
@@ -2536,6 +2623,9 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	if (kiocb->ki_flags & IOCB_NOWAIT)
 		req->flags |= REQ_F_NOWAIT;
 
+	if (kiocb->ki_flags & IOCB_DIRECT)
+		io_get_req_task(req);
+
 	if (force_nonblock)
 		kiocb->ki_flags |= IOCB_NOWAIT;
 
@@ -3037,6 +3127,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 
 	ret = rw_verify_area(READ, req->file, &kiocb->ki_pos, iov_count);
 	if (!ret) {
+		unsigned long nr_segs = iter.nr_segs;
 		ssize_t ret2;
 
 		if (req->file->f_op->read_iter)
@@ -3054,6 +3145,8 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
 				goto copy_iov;
 			kiocb_done(kiocb, ret2, cs);
 		} else {
+			iter.count = iov_count;
+			iter.nr_segs = nr_segs;
 copy_iov:
 			ret = io_setup_async_rw(req, io_size, iovec,
 						inline_vecs, &iter);
@@ -3120,6 +3213,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 
 	ret = rw_verify_area(WRITE, req->file, &kiocb->ki_pos, iov_count);
 	if (!ret) {
+		unsigned long nr_segs = iter.nr_segs;
 		ssize_t ret2;
 
 		/*
@@ -3157,6 +3251,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
 				goto copy_iov;
 			kiocb_done(kiocb, ret2, cs);
 		} else {
+			iter.count = iov_count;
+			iter.nr_segs = nr_segs;
 copy_iov:
 			ret = io_setup_async_rw(req, io_size, iovec,
 						inline_vecs, &iter);
-- 
Gitee


From 7448b91d9beaa37a768e6460c57938f92ade9706 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Fri, 11 Mar 2022 10:43:37 +0800
Subject: [PATCH 005/340] block: don't ignore REQ_NOWAIT for direct IO

mainline inclusion
from mainline-v5.12-rc6
commit f8b78caf21d5bc3fcfc40c18898f9d52ed1451a5
category: bugfix
bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
CVE: NA

-------------------------------------------------

If IOCB_NOWAIT is set on submission, then that needs to get propagated to
REQ_NOWAIT on the block side. Otherwise we completely lose this
information, and any issuer of IOCB_NOWAIT IO will potentially end up
blocking on eg request allocation on the storage side.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
conflict:
        fs/block_dev.c
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/block_dev.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 8b299347f2aa..9868b21b8ef9 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -253,6 +253,9 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
 		task_io_account_write(ret);
 	}
 
+	if (iocb->ki_flags & IOCB_NOWAIT)
+		bio.bi_opf |= REQ_NOWAIT;
+
 	qc = submit_bio(&bio);
 	for (;;) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
@@ -407,6 +410,8 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 			bio->bi_opf = dio_bio_write_op(iocb);
 			task_io_account_write(bio->bi_iter.bi_size);
 		}
+		if (iocb->ki_flags & IOCB_NOWAIT)
+			bio->bi_opf |= REQ_NOWAIT;
 
 		dio->size += bio->bi_iter.bi_size;
 		pos += bio->bi_iter.bi_size;
-- 
Gitee


From 50fb507af014891e0fb2ecbeda7cb40cdcdaede1 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 11 Mar 2022 10:43:38 +0800
Subject: [PATCH 006/340] io_uring: remove redundant initialization of variable
 ret

mainline inclusion
from mainline-v5.12-rc1
commit 4a245479c2312e6b51862c21af134d4191ab9cf7
category: bugfix
bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
CVE: NA

-------------------------------------------------

The variable ret is being initialized with a value that is never read
and it is being updated later with a new value.  The initialization is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
conflict:
        fs/io_uring.c
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/io_uring.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index aeedf191b813..4e552dfe1c64 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2374,7 +2374,7 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res,
 static bool io_resubmit_prep(struct io_kiocb *req, int error)
 {
 	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
-	ssize_t ret = -ECANCELED;
+	ssize_t ret;
 	struct iov_iter iter;
 	int rw;
 
-- 
Gitee


From 867ba1fd06f411f1ceaa33e978512803b48a722e Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 11 Mar 2022 10:43:39 +0800
Subject: [PATCH 007/340] io_uring: don't double complete failed reissue
 request

mainline inclusion
from mainline-v5.10-rc5
commit c993df5a688975bf9ce899706ca13d2bc8d6be25
category: bugfix
bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
CVE: NA

-------------------------------------------------

Zorro reports that an xfstest test case is failing, and it turns out that
for the reissue path we can potentially issue a double completion on the
request for the failure path. There's an issue around the retry as well,
but for now, at least just make sure that we handle the error path
correctly.

Cc: stable@vger.kernel.org
Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
Reported-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
conflict:
        fs/io_uring.c
Change based on e1e16097e265 ("io_uring: provide generic
io_req_complete() helper")
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/io_uring.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 4e552dfe1c64..bdcac452b174 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2408,9 +2408,7 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 		return true;
 	kfree(iovec);
 end_req:
-	io_cqring_add_event(req, ret, 0);
 	req_set_fail_links(req);
-	io_put_req(req);
 	return false;
 }
 
-- 
Gitee


From f7a31280eeb428b156d5d48604220f1ee84a1cb5 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 11 Mar 2022 10:43:40 +0800
Subject: [PATCH 008/340] io_uring: don't re-setup vecs/iter in
 io_resumit_prep() is already there

mainline inclusion
from mainline-v5.9-rc7
commit 8f3d749685e48c44dbe877ac9781079d85f914c8
category: bugfix
bugzilla: 186136, https://gitee.com/openeuler/kernel/issues/I4RM1D
CVE: NA

-------------------------------------------------

If we already have mapped the necessary data for retry, then don't set
it up again. It's a pointless operation, and we leak the iovec if it's
a large (non-stack) vec.

Fixes: b63534c41e20 ("io_uring: re-issue block requests that failed because of resources")
Signed-off-by: Jens Axboe <axboe@kernel.dk>
conflict:
        fs/io_uring.c
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
---
 fs/io_uring.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index bdcac452b174..7240b5423170 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2400,13 +2400,17 @@ static bool io_resubmit_prep(struct io_kiocb *req, int error)
 		goto end_req;
 	}
 
-	ret = io_import_iovec(rw, req, &iovec, &iter, false);
-	if (ret < 0)
-		goto end_req;
-	ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
-	if (!ret)
+	if (!req->io) {
+		ret = io_import_iovec(rw, req, &iovec, &iter, false);
+		if (ret < 0)
+			goto end_req;
+		ret = io_setup_async_rw(req, ret, iovec, inline_vecs, &iter);
+		if (!ret)
+			return true;
+		kfree(iovec);
+	} else {
 		return true;
-	kfree(iovec);
+	}
 end_req:
 	req_set_fail_links(req);
 	return false;
-- 
Gitee


From 2154471020979d12998292050eddf0e066b2a57d Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Fri, 11 Mar 2022 11:30:09 +0800
Subject: [PATCH 009/340] sched: Fix sleeping in atomic context at
 cpu_qos_write()

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4WOPM
CVE: NA

--------------------------------

cfs_bandwidth_usage_inc() need hold jump_label_mutex and
might sleep, so we can not call it in atomic context.
Fix this by moving cfs_bandwidth_usage_{inc,dec}() out of
rcu read critical section.

Fixes: f7b390cd929 ("sched: Change cgroup task scheduler policy")
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: Chen Hui <judy.chenhui@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/sched/core.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index b36a3b4c60e9..496ce71f93a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6981,13 +6981,10 @@ static int tg_change_scheduler(struct task_group *tg, void *data)
 	struct cgroup_subsys_state *css = &tg->css;
 
 	tg->qos_level = qos_level;
-	if (qos_level == -1) {
+	if (qos_level == -1)
 		policy = SCHED_IDLE;
-		cfs_bandwidth_usage_inc();
-	} else {
+	else
 		policy = SCHED_NORMAL;
-		cfs_bandwidth_usage_dec();
-	}
 
 	param.sched_priority = 0;
 	css_task_iter_start(css, 0, &it);
@@ -7015,6 +7012,13 @@ static int cpu_qos_write(struct cgroup_subsys_state *css,
 	if (tg->qos_level == -1 && qos_level == 0)
 		return -EINVAL;
 
+	cpus_read_lock();
+	if (qos_level == -1)
+		cfs_bandwidth_usage_inc();
+	else
+		cfs_bandwidth_usage_dec();
+	cpus_read_unlock();
+
 	rcu_read_lock();
 	walk_tg_tree_from(tg, tg_change_scheduler, tg_nop, (void *)(&qos_level));
 	rcu_read_unlock();
-- 
Gitee


From 5b4e1d322beedfe8abfbc60ccf85a6bf46c1cde9 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 11 Mar 2022 16:31:11 +0800
Subject: [PATCH 010/340] arm64: acpi: fix UBSAN warning

mainline inclusion
from mainline-v5.8-rc1
commit a194c33f45f83068ef13bf1d16e26d4ca3ecc098
category: bugfix
bugzilla: 89150
CVE: NA

Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=a194c33f45f83068ef13bf1d16e26d4ca3ecc098

--------------------------------

Will reported a UBSAN warning:

UBSAN: null-ptr-deref in arch/arm64/kernel/smp.c:596:6
member access within null pointer of type 'struct acpi_madt_generic_interrupt'
CPU: 0 PID: 0 Comm: swapper Not tainted 5.7.0-rc6-00124-g96bc42ff0a82 #1
Call trace:
 dump_backtrace+0x0/0x384
 show_stack+0x28/0x38
 dump_stack+0xec/0x174
 handle_null_ptr_deref+0x134/0x174
 __ubsan_handle_type_mismatch_v1+0x84/0xa4
 acpi_parse_gic_cpu_interface+0x60/0xe8
 acpi_parse_entries_array+0x288/0x498
 acpi_table_parse_entries_array+0x178/0x1b4
 acpi_table_parse_madt+0xa4/0x110
 acpi_parse_and_init_cpus+0x38/0x100
 smp_init_cpus+0x74/0x258
 setup_arch+0x350/0x3ec
 start_kernel+0x98/0x6f4

This is from the use of the ACPI_OFFSET in
arch/arm64/include/asm/acpi.h. Replace its use with offsetof from
include/linux/stddef.h which should implement the same logic using
__builtin_offsetof, so that UBSAN wont warn.

Reported-by: Will Deacon <will@kernel.org>
Suggested-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Jeremy Linton <jeremy.linton@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Cc: stable@vger.kernel.org
Link: https://lore.kernel.org/lkml/20200521100952.GA5360@willie-the-truck/
Link: https://lore.kernel.org/r/20200608203818.189423-1-ndesaulniers@google.com
Signed-off-by: Will Deacon <will@kernel.org>
Signed-off-by: linyujun <linyujun809@huawei.com>
Reviewed-by: chenlifu <chenlifu@huawei.com>
Reviewed-by: He Ying <heying24@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 arch/arm64/include/asm/acpi.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h
index f364115ac2ef..348f12447ecd 100644
--- a/arch/arm64/include/asm/acpi.h
+++ b/arch/arm64/include/asm/acpi.h
@@ -15,6 +15,7 @@
 #include <linux/efi.h>
 #include <linux/memblock.h>
 #include <linux/psci.h>
+#include <linux/stddef.h>
 
 #include <asm/cputype.h>
 #include <asm/io.h>
@@ -33,14 +34,14 @@
  * is therefore used to delimit the MADT GICC structure minimum length
  * appropriately.
  */
-#define ACPI_MADT_GICC_MIN_LENGTH   ACPI_OFFSET(  \
+#define ACPI_MADT_GICC_MIN_LENGTH   offsetof(  \
 	struct acpi_madt_generic_interrupt, efficiency_class)
 
 #define BAD_MADT_GICC_ENTRY(entry, end)					\
 	(!(entry) || (entry)->header.length < ACPI_MADT_GICC_MIN_LENGTH || \
 	(unsigned long)(entry) + (entry)->header.length > (end))
 
-#define ACPI_MADT_GICC_SPE  (ACPI_OFFSET(struct acpi_madt_generic_interrupt, \
+#define ACPI_MADT_GICC_SPE  (offsetof(struct acpi_madt_generic_interrupt, \
 	spe_interrupt) + sizeof(u16))
 
 /* Basic configuration for ACPI */
-- 
Gitee


From fd9502211efce477cf42ba9c84f563c853479fa8 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Fri, 11 Mar 2022 16:54:34 +0800
Subject: [PATCH 011/340] net: hns3: fix pf vlan filter out of work after self
 test

driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4W90Z
CVE: NA

----------------------------

After a self test, the vlan filter of PF is wrongly closed,
cause the vlan filter out of work. The second parameter of
enable_vlan_filter() must pass true, and in enable_vlan_filter(),
it will decide whether to open or keep the status of vlan filter.

Fixes: 79549957e66f ("Revert: net: hns3: adds support for extended VLAN mode and 'QOS' in vlan 802.1Q protocol.")

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Reviewed-by: li yongxin <liyongxin1@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index ef870da7a8a4..be771c75e37e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -395,8 +395,7 @@ static void hns3_self_test(struct net_device *ndev,
 
 #if IS_ENABLED(CONFIG_VLAN_8021Q)
 	if (dis_vlan_filter)
-		h->ae_algo->ops->enable_vlan_filter(h,
-					ndev->flags & IFF_PROMISC);
+		h->ae_algo->ops->enable_vlan_filter(h, true);
 #endif
 
 	if (if_running)
-- 
Gitee


From 70cd9d1e519bf3685b97adda9bbfa2b98790aef5 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 11 Mar 2022 16:54:35 +0800
Subject: [PATCH 012/340] net: hns3: fix RMW issue for VLAN filter switch

mainline inclusion
from mainline-v5.6-rc6
commit 903b85d3adce99a5301d5959c4d3c9d14a7974d4
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4W96A
CVE: NA

----------------------------

According to the user manual, the ingress and egress VLAN filter
are configured at the same time. Currently, hclge_init_vlan_config()
and hclge_set_vlan_spoofchk() will both change the VLAN filter
switch. So it's necessary to read the old configuration before
modifying it.

Fixes: fea4e7dc2790 ("net: hns3: add support for spoof check setting")
Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Reviewed-by: li yongxin <liyongxin1@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 .../hisilicon/hns3/hns3pf/hclge_main.c        | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b4c8f410e6f8..ca11b49c93a2 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -8324,16 +8324,30 @@ static int hclge_set_vlan_filter_ctrl(struct hclge_dev *hdev, u8 vlan_type,
 	struct hclge_desc desc;
 	int ret;
 
-	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, false);
+	/* read current vlan filter parameter */
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_VLAN_FILTER_CTRL, true);
 	req = (struct hclge_vlan_filter_ctrl_cmd *)desc.data;
 	req->vlan_type = vlan_type;
-	req->vlan_fe = filter_en ? fe_type : 0;
 	req->vf_id = vf_id;
 
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get vport%u vlan filter config, ret = %d.\n",
+			vf_id, ret);
+		return ret;
+	}
+
+	/* modify and write new config parameter */
+	hclge_cmd_reuse_desc(&desc, false);
+	req->vlan_fe = filter_en ?
+			(req->vlan_fe | fe_type) : (req->vlan_fe & ~fe_type);
+
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"set vlan filter fail, ret =%d.\n", ret);
+			"failed to set vport%u vlan filter, ret = %d.\n",
+			vf_id, ret);
 
 	return ret;
 }
-- 
Gitee


From 7faf1f777a2a17fa874fb243f442e24115da69c0 Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Fri, 11 Mar 2022 16:54:36 +0800
Subject: [PATCH 013/340] net: hns3: update hns3 version to 22.2.1

driver inclusion
category: bugfix
bugzilla: NA
CVE: NA

----------------------------

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Reviewed-by: li yongxin <liyongxin1@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h                     | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_version.h | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h                 | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h         | 2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h       | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index aebf0d272251..f8131e4e343a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -32,7 +32,7 @@
 #include <linux/types.h>
 #include <net/pkt_cls.h>
 
-#define HNAE3_MOD_VERSION "21.12.4"
+#define HNAE3_MOD_VERSION "22.2.1"
 
 #define HNAE3_MIN_VECTOR_NUM	2 /* first one for misc, another for IO */
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_version.h b/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_version.h
index f3a905252deb..a52b964d953c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_version.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_cae/hns3_cae_version.h
@@ -4,7 +4,7 @@
 #ifndef __HNS3_CAE_VERSION_H__
 #define __HNS3_CAE_VERSION_H__
 
-#define HNS3_CAE_MOD_VERSION "21.12.4"
+#define HNS3_CAE_MOD_VERSION "22.2.1"
 
 #define CMT_ID_LEN 8
 #define RESV_LEN 3
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index da140a3d568d..df7b9d3434ad 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -8,7 +8,7 @@
 
 #include "hnae3.h"
 
-#define HNS3_MOD_VERSION "21.12.4"
+#define HNS3_MOD_VERSION "22.2.1"
 
 extern char hns3_driver_version[];
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 81451ccb4140..027d9137f76d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -12,7 +12,7 @@
 #include "hclge_cmd.h"
 #include "hnae3.h"
 
-#define HCLGE_MOD_VERSION "21.12.4"
+#define HCLGE_MOD_VERSION "22.2.1"
 #define HCLGE_DRIVER_NAME "hclge"
 
 #define HCLGE_MAX_PF_NUM		8
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 6880c4cb52f3..b2fd2a154625 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -10,7 +10,7 @@
 #include "hclgevf_cmd.h"
 #include "hnae3.h"
 
-#define HCLGEVF_MOD_VERSION "21.12.4"
+#define HCLGEVF_MOD_VERSION "22.2.1"
 #define HCLGEVF_DRIVER_NAME "hclgevf"
 
 #define HCLGEVF_MAX_VLAN_ID	4095
-- 
Gitee


From 69bb4bb982ad84140780b7cd5aa4498be6706e83 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 14 Mar 2022 09:57:23 +0800
Subject: [PATCH 014/340] net: skip virtio_net_hdr_set_proto if protocol
 already set

stable inclusion
from linux-4.19.223
commit 5fefa884a55bd5995e77f4fdd89f0400f42b39f7

--------------------------------

[ Upstream commit 1ed1d592113959f00cc552c3b9f47ca2d157768f ]

virtio_net_hdr_set_proto infers skb->protocol from the virtio_net_hdr
gso_type, to avoid packets getting dropped for lack of a proto type.

Its protocol choice is a guess, especially in the case of UFO, where
the single VIRTIO_NET_HDR_GSO_UDP label covers both UFOv4 and UFOv6.

Skip this best effort if the field is already initialized. Whether
explicitly from userspace, or implicitly based on an earlier call to
dev_parse_header_protocol (which is more robust, but was introduced
after this patch).

Fixes: 9d2f67e43b73 ("net/packet: fix packet drop as of virtio gso")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20211220145027.2784293-1-willemdebruijn.kernel@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/linux/virtio_net.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 3e2ee7739c11..a7c197299fc7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -10,6 +10,9 @@
 static inline int virtio_net_hdr_set_proto(struct sk_buff *skb,
 					   const struct virtio_net_hdr *hdr)
 {
+	if (skb->protocol)
+		return 0;
+
 	switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
 	case VIRTIO_NET_HDR_GSO_TCPV4:
 	case VIRTIO_NET_HDR_GSO_UDP:
-- 
Gitee


From a97abb624a6719f9834e68f0920e832ce3963e9b Mon Sep 17 00:00:00 2001
From: Wu Bo <wubo40@huawei.com>
Date: Mon, 14 Mar 2022 09:57:24 +0800
Subject: [PATCH 015/340] ipmi: Fix UAF when uninstall ipmi_si and
 ipmi_msghandler module

stable inclusion
from linux-4.19.223
commit 925229d552724e1bba1abf01d3a0b1318539b012

--------------------------------

[ Upstream commit ffb76a86f8096a8206be03b14adda6092e18e275 ]

Hi,

When testing install and uninstall of ipmi_si.ko and ipmi_msghandler.ko,
the system crashed.

The log as follows:
[  141.087026] BUG: unable to handle kernel paging request at ffffffffc09b3a5a
[  141.087241] PGD 8fe4c0d067 P4D 8fe4c0d067 PUD 8fe4c0f067 PMD 103ad89067 PTE 0
[  141.087464] Oops: 0010 [#1] SMP NOPTI
[  141.087580] CPU: 67 PID: 668 Comm: kworker/67:1 Kdump: loaded Not tainted 4.18.0.x86_64 #47
[  141.088009] Workqueue: events 0xffffffffc09b3a40
[  141.088009] RIP: 0010:0xffffffffc09b3a5a
[  141.088009] Code: Bad RIP value.
[  141.088009] RSP: 0018:ffffb9094e2c3e88 EFLAGS: 00010246
[  141.088009] RAX: 0000000000000000 RBX: ffff9abfdb1f04a0 RCX: 0000000000000000
[  141.088009] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246
[  141.088009] RBP: 0000000000000000 R08: ffff9abfffee3cb8 R09: 00000000000002e1
[  141.088009] R10: ffffb9094cb73d90 R11: 00000000000f4240 R12: ffff9abfffee8700
[  141.088009] R13: 0000000000000000 R14: ffff9abfdb1f04a0 R15: ffff9abfdb1f04a8
[  141.088009] FS:  0000000000000000(0000) GS:ffff9abfffec0000(0000) knlGS:0000000000000000
[  141.088009] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  141.088009] CR2: ffffffffc09b3a30 CR3: 0000008fe4c0a001 CR4: 00000000007606e0
[  141.088009] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  141.088009] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  141.088009] PKRU: 55555554
[  141.088009] Call Trace:
[  141.088009]  ? process_one_work+0x195/0x390
[  141.088009]  ? worker_thread+0x30/0x390
[  141.088009]  ? process_one_work+0x390/0x390
[  141.088009]  ? kthread+0x10d/0x130
[  141.088009]  ? kthread_flush_work_fn+0x10/0x10
[  141.088009]  ? ret_from_fork+0x35/0x40] BUG: unable to handle kernel paging request at ffffffffc0b28a5a
[  200.223240] PGD 97fe00d067 P4D 97fe00d067 PUD 97fe00f067 PMD a580cbf067 PTE 0
[  200.223464] Oops: 0010 [#1] SMP NOPTI
[  200.223579] CPU: 63 PID: 664 Comm: kworker/63:1 Kdump: loaded Not tainted 4.18.0.x86_64 #46
[  200.224008] Workqueue: events 0xffffffffc0b28a40
[  200.224008] RIP: 0010:0xffffffffc0b28a5a
[  200.224008] Code: Bad RIP value.
[  200.224008] RSP: 0018:ffffbf3c8e2a3e88 EFLAGS: 00010246
[  200.224008] RAX: 0000000000000000 RBX: ffffa0799ad6bca0 RCX: 0000000000000000
[  200.224008] RDX: 0000000000000000 RSI: 0000000000000246 RDI: 0000000000000246
[  200.224008] RBP: 0000000000000000 R08: ffff9fe43fde3cb8 R09: 00000000000000d5
[  200.224008] R10: ffffbf3c8cb53d90 R11: 00000000000f4240 R12: ffff9fe43fde8700
[  200.224008] R13: 0000000000000000 R14: ffffa0799ad6bca0 R15: ffffa0799ad6bca8
[  200.224008] FS:  0000000000000000(0000) GS:ffff9fe43fdc0000(0000) knlGS:0000000000000000
[  200.224008] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  200.224008] CR2: ffffffffc0b28a30 CR3: 00000097fe00a002 CR4: 00000000007606e0
[  200.224008] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  200.224008] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  200.224008] PKRU: 55555554
[  200.224008] Call Trace:
[  200.224008]  ? process_one_work+0x195/0x390
[  200.224008]  ? worker_thread+0x30/0x390
[  200.224008]  ? process_one_work+0x390/0x390
[  200.224008]  ? kthread+0x10d/0x130
[  200.224008]  ? kthread_flush_work_fn+0x10/0x10
[  200.224008]  ? ret_from_fork+0x35/0x40
[  200.224008] kernel fault(0x1) notification starting on CPU 63
[  200.224008] kernel fault(0x1) notification finished on CPU 63
[  200.224008] CR2: ffffffffc0b28a5a
[  200.224008] ---[ end trace c82a412d93f57412 ]---

The reason is as follows:
T1: rmmod ipmi_si.
    ->ipmi_unregister_smi()
        -> ipmi_bmc_unregister()
            -> __ipmi_bmc_unregister()
                -> kref_put(&bmc->usecount, cleanup_bmc_device);
                    -> schedule_work(&bmc->remove_work);

T2: rmmod ipmi_msghandler.
    ipmi_msghander module uninstalled, and the module space
    will be freed.

T3: bmc->remove_work doing cleanup the bmc resource.
    -> cleanup_bmc_work()
        -> platform_device_unregister(&bmc->pdev);
            -> platform_device_del(pdev);
                -> device_del(&pdev->dev);
                    -> kobject_uevent(&dev->kobj, KOBJ_REMOVE);
                        -> kobject_uevent_env()
                            -> dev_uevent()
                                -> if (dev->type && dev->type->name)

   'dev->type'(bmc_device_type) pointer space has freed when uninstall
    ipmi_msghander module, 'dev->type->name' cause the system crash.

drivers/char/ipmi/ipmi_msghandler.c:
2820 static const struct device_type bmc_device_type = {
2821         .groups         = bmc_dev_attr_groups,
2822 };

Steps to reproduce:
Add a time delay in cleanup_bmc_work() function,
and uninstall ipmi_si and ipmi_msghandler module.

2910 static void cleanup_bmc_work(struct work_struct *work)
2911 {
2912         struct bmc_device *bmc = container_of(work, struct bmc_device,
2913                                               remove_work);
2914         int id = bmc->pdev.id; /* Unregister overwrites id */
2915
2916         msleep(3000);   <---
2917         platform_device_unregister(&bmc->pdev);
2918         ida_simple_remove(&ipmi_bmc_ida, id);
2919 }

Use 'remove_work_wq' instead of 'system_wq' to solve this issues.

Fixes: b2cfd8ab4add ("ipmi: Rework device id and guid handling to catch changing BMCs")
Signed-off-by: Wu Bo <wubo40@huawei.com>
Message-Id: <1640070034-56671-1-git-send-email-wubo40@huawei.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 272c34102875..19c33a86c1b8 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -2863,7 +2863,7 @@ cleanup_bmc_device(struct kref *ref)
 	 * with removing the device attributes while reading a device
 	 * attribute.
 	 */
-	schedule_work(&bmc->remove_work);
+	queue_work(remove_work_wq, &bmc->remove_work);
 }
 
 /*
-- 
Gitee


From 4e06cf778e884ac021ec2ff1f42e98f4d893ac5d Mon Sep 17 00:00:00 2001
From: Fernando Fernandez Mancera <ffmancera@riseup.net>
Date: Mon, 14 Mar 2022 09:57:25 +0800
Subject: [PATCH 016/340] bonding: fix ad_actor_system option setting to
 default

stable inclusion
from linux-4.19.223
commit 4e4591490f8a930e11131514db4cce4201ffc8a5

--------------------------------

[ Upstream commit 1c15b05baea71a5ff98235783e3e4ad227760876 ]

When 802.3ad bond mode is configured the ad_actor_system option is set to
"00:00:00:00:00:00". But when trying to set the all-zeroes MAC as actors'
system address it was failing with EINVAL.

An all-zeroes ethernet address is valid, only multicast addresses are not
valid values.

Fixes: 171a42c38c6e ("bonding: add netlink support for sys prio, actor sys mac, and port key")
Signed-off-by: Fernando Fernandez Mancera <ffmancera@riseup.net>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Link: https://lore.kernel.org/r/20211221111345.2462-1-ffmancera@riseup.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 Documentation/networking/bonding.txt | 11 ++++++-----
 drivers/net/bonding/bond_options.c   |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index e3abfbd32f71..b020e6ce6dd4 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -191,11 +191,12 @@ ad_actor_sys_prio
 ad_actor_system
 
 	In an AD system, this specifies the mac-address for the actor in
-	protocol packet exchanges (LACPDUs). The value cannot be NULL or
-	multicast. It is preferred to have the local-admin bit set for this
-	mac but driver does not enforce it. If the value is not given then
-	system defaults to using the masters' mac address as actors' system
-	address.
+	protocol packet exchanges (LACPDUs). The value cannot be a multicast
+	address. If the all-zeroes MAC is specified, bonding will internally
+	use the MAC of the bond itself. It is preferred to have the
+	local-admin bit set for this mac but driver does not enforce it. If
+	the value is not given then system defaults to using the masters'
+	mac address as actors' system address.
 
 	This parameter has effect only in 802.3ad mode and is available through
 	SysFs interface.
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 7e07e213f4c2..1745cba04c18 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1458,7 +1458,7 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
 		mac = (u8 *)&newval->value;
 	}
 
-	if (!is_valid_ether_addr(mac))
+	if (is_multicast_ether_addr(mac))
 		goto err;
 
 	netdev_dbg(bond->dev, "Setting ad_actor_system to %pM\n", mac);
-- 
Gitee


From fe5ad016b1b642ab450f61a689b0e943139fafe6 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Mon, 14 Mar 2022 09:57:26 +0800
Subject: [PATCH 017/340] ipmi: bail out if init_srcu_struct fails

stable inclusion
from linux-4.19.223
commit db0d90490674c297a2e15a1ca3e40cd6975c30dc

--------------------------------

commit 2b5160b12091285c5aca45980f100a9294af7b04 upstream.

In case, init_srcu_struct fails (because of memory allocation failure), we
might proceed with the driver initialization despite srcu_struct not being
entirely initialized.

Fixes: 913a89f009d9 ("ipmi: Don't initialize anything in the core until something uses it")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Cc: Corey Minyard <cminyard@mvista.com>
Cc: stable@vger.kernel.org
Message-Id: <20211217154410.1228673-1-cascardo@canonical.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 19c33a86c1b8..9b66afeb1237 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -5083,7 +5083,9 @@ static int ipmi_init_msghandler(void)
 	if (initialized)
 		goto out;
 
-	init_srcu_struct(&ipmi_interfaces_srcu);
+	rv = init_srcu_struct(&ipmi_interfaces_srcu);
+	if (rv)
+		goto out;
 
 	timer_setup(&ipmi_timer, ipmi_timeout, 0);
 	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
-- 
Gitee


From 12db569e13e64b94db22e806c79e75dc6ae4dc85 Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Mon, 14 Mar 2022 09:57:27 +0800
Subject: [PATCH 018/340] ipmi: fix initialization when workqueue allocation
 fails

stable inclusion
from linux-4.19.223
commit eb84855d3e8799b67cdbadc7a5c53997cbfc3580

--------------------------------

commit 75d70d76cb7b927cace2cb34265d68ebb3306b13 upstream.

If the workqueue allocation fails, the driver is marked as not initialized,
and timer and panic_notifier will be left registered.

Instead of removing those when workqueue allocation fails, do the workqueue
initialization before doing it, and cleanup srcu_struct if it fails.

Fixes: 1d49eb91e86e ("ipmi: Move remove_work to dedicated workqueue")
Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Cc: Corey Minyard <cminyard@mvista.com>
Cc: Ioanna Alifieraki <ioanna-maria.alifieraki@canonical.com>
Cc: stable@vger.kernel.org
Message-Id: <20211217154410.1228673-2-cascardo@canonical.com>
Signed-off-by: Corey Minyard <cminyard@mvista.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 9b66afeb1237..83eaab66d2f6 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -5087,20 +5087,23 @@ static int ipmi_init_msghandler(void)
 	if (rv)
 		goto out;
 
-	timer_setup(&ipmi_timer, ipmi_timeout, 0);
-	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
-
-	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
 	remove_work_wq = create_singlethread_workqueue("ipmi-msghandler-remove-wq");
 	if (!remove_work_wq) {
 		pr_err("unable to create ipmi-msghandler-remove-wq workqueue");
 		rv = -ENOMEM;
-		goto out;
+		goto out_wq;
 	}
 
+	timer_setup(&ipmi_timer, ipmi_timeout, 0);
+	mod_timer(&ipmi_timer, jiffies + IPMI_TIMEOUT_JIFFIES);
+
+	atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
+
 	initialized = true;
 
+out_wq:
+	if (rv)
+		cleanup_srcu_struct(&ipmi_interfaces_srcu);
 out:
 	mutex_unlock(&ipmi_interfaces_mutex);
 	return rv;
-- 
Gitee


From 4d0a4dc8e07599df06cc2653e03132a30360c1df Mon Sep 17 00:00:00 2001
From: Andrew Cooper <andrew.cooper3@citrix.com>
Date: Mon, 14 Mar 2022 09:57:28 +0800
Subject: [PATCH 019/340] x86/pkey: Fix undefined behaviour with PKRU_WD_BIT

stable inclusion
from linux-4.19.223
commit 94cc1e8331973ee7a2b4336f7ba67f48c2877d0a

--------------------------------

commit 57690554abe135fee81d6ac33cc94d75a7e224bb upstream.

Both __pkru_allows_write() and arch_set_user_pkey_access() shift
PKRU_WD_BIT (a signed constant) by up to 30 bits, hitting the
sign bit.

Use unsigned constants instead.

Clearly pkey 15 has not been used in combination with UBSAN yet.

Noticed by code inspection only.  I can't actually provoke the
compiler into generating incorrect logic as far as this shift is
concerned.

[
  dhansen: add stable@ tag, plus minor changelog massaging,

           For anyone doing backports, these #defines were in
	   arch/x86/include/asm/pgtable.h before 784a46618f6.
]

Fixes: 33a709b25a76 ("mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys")
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: stable@vger.kernel.org
Link: https://lkml.kernel.org/r/20211216000856.4480-1-andrew.cooper3@citrix.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 arch/x86/include/asm/pgtable.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index fe6f9e7f15bb..1d79485ae972 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1353,8 +1353,8 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd)
 #endif
 #endif
 
-#define PKRU_AD_BIT 0x1
-#define PKRU_WD_BIT 0x2
+#define PKRU_AD_BIT 0x1u
+#define PKRU_WD_BIT 0x2u
 #define PKRU_BITS_PER_PKEY 2
 
 static inline bool __pkru_allows_read(u32 pkru, u16 pkey)
-- 
Gitee


From cf75bcdb67f6da74b4af8abf05d669b7e3e0fe18 Mon Sep 17 00:00:00 2001
From: Tom Rix <trix@redhat.com>
Date: Mon, 14 Mar 2022 09:57:29 +0800
Subject: [PATCH 020/340] selinux: initialize proto variable in
 selinux_ip_postroute_compat()

stable inclusion
from linux-4.19.224
commit c34a0b5b3bf01062eb5f2a47870c5ab57256bdfc

--------------------------------

commit 732bc2ff080c447f8524f40c970c481f5da6eed3 upstream.

Clang static analysis reports this warning

hooks.c:5765:6: warning: 4th function call argument is an uninitialized
                value
        if (selinux_xfrm_postroute_last(sksec->sid, skb, &ad, proto))
            ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

selinux_parse_skb() can return ok without setting proto.  The later call
to selinux_xfrm_postroute_last() does an early check of proto and can
return ok if the garbage proto value matches.  So initialize proto.

Cc: stable@vger.kernel.org
Fixes: eef9b41622f2 ("selinux: cleanup selinux_xfrm_sock_rcv_skb() and selinux_xfrm_postroute_last()")
Signed-off-by: Tom Rix <trix@redhat.com>
[PM: typo/spelling and checkpatch.pl description fixes]
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 security/selinux/hooks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 1b611c068669..25353b4975c9 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -5799,7 +5799,7 @@ static unsigned int selinux_ip_postroute_compat(struct sk_buff *skb,
 	struct common_audit_data ad;
 	struct lsm_network_audit net = {0,};
 	char *addrp;
-	u8 proto;
+	u8 proto = 0;
 
 	if (sk == NULL)
 		return NF_ACCEPT;
-- 
Gitee


From 69563ca8ecb18b9536fd8e3e5512599dc58a04bc Mon Sep 17 00:00:00 2001
From: Coco Li <lixiaoyan@google.com>
Date: Mon, 14 Mar 2022 09:57:30 +0800
Subject: [PATCH 021/340] udp: using datalen to cap ipv6 udp max gso segments

stable inclusion
from linux-4.19.224
commit bbd9c7120c31c1aff6dcffdcce122773d163df72

--------------------------------

[ Upstream commit 736ef37fd9a44f5966e25319d08ff7ea99ac79e8 ]

The max number of UDP gso segments is intended to cap to
UDP_MAX_SEGMENTS, this is checked in udp_send_skb().

skb->len contains network and transport header len here, we should use
only data len instead.

This is the ipv6 counterpart to the below referenced commit,
which missed the ipv6 change

Fixes: 158390e45612 ("udp: using datalen to cap max gso segments")
Signed-off-by: Coco Li <lixiaoyan@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20211223222441.2975883-1-lixiaoyan@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/udp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 581763d6df12..3ed5fe0055af 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1069,7 +1069,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6,
 			kfree_skb(skb);
 			return -EINVAL;
 		}
-		if (skb->len > cork->gso_size * UDP_MAX_SEGMENTS) {
+		if (datalen > cork->gso_size * UDP_MAX_SEGMENTS) {
 			kfree_skb(skb);
 			return -EINVAL;
 		}
-- 
Gitee


From a69ad5a8f7bf6178a78f337fb49f97e663fd90fa Mon Sep 17 00:00:00 2001
From: Muchun Song <songmuchun@bytedance.com>
Date: Mon, 14 Mar 2022 09:57:31 +0800
Subject: [PATCH 022/340] net: fix use-after-free in tw_timer_handler

stable inclusion
from linux-4.19.224
commit a8e1944b44f94f5c5f530e434c5eaee787254566

--------------------------------

commit e22e45fc9e41bf9fcc1e92cfb78eb92786728ef0 upstream.

A real world panic issue was found as follow in Linux 5.4.

    BUG: unable to handle page fault for address: ffffde49a863de28
    PGD 7e6fe62067 P4D 7e6fe62067 PUD 7e6fe63067 PMD f51e064067 PTE 0
    RIP: 0010:tw_timer_handler+0x20/0x40
    Call Trace:
     <IRQ>
     call_timer_fn+0x2b/0x120
     run_timer_softirq+0x1ef/0x450
     __do_softirq+0x10d/0x2b8
     irq_exit+0xc7/0xd0
     smp_apic_timer_interrupt+0x68/0x120
     apic_timer_interrupt+0xf/0x20

This issue was also reported since 2017 in the thread [1],
unfortunately, the issue was still can be reproduced after fixing
DCCP.

The ipv4_mib_exit_net is called before tcp_sk_exit_batch when a net
namespace is destroyed since tcp_sk_ops is registered befrore
ipv4_mib_ops, which means tcp_sk_ops is in the front of ipv4_mib_ops
in the list of pernet_list. There will be a use-after-free on
net->mib.net_statistics in tw_timer_handler after ipv4_mib_exit_net
if there are some inflight time-wait timers.

This bug is not introduced by commit f2bf415cfed7 ("mib: add net to
NET_ADD_STATS_BH") since the net_statistics is a global variable
instead of dynamic allocation and freeing. Actually, commit
61a7e26028b9 ("mib: put net statistics on struct net") introduces
the bug since it put net statistics on struct net and free it when
net namespace is destroyed.

Moving init_ipv4_mibs() to the front of tcp_init() to fix this bug
and replace pr_crit() with panic() since continuing is meaningless
when init_ipv4_mibs() fails.

[1] https://groups.google.com/g/syzkaller/c/p1tn-_Kc6l4/m/smuL_FMAAgAJ?pli=1

Fixes: 61a7e26028b9 ("mib: put net statistics on struct net")
Signed-off-by: Muchun Song <songmuchun@bytedance.com>
Cc: Cong Wang <cong.wang@bytedance.com>
Cc: Fam Zheng <fam.zheng@bytedance.com>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211228104145.9426-1-songmuchun@bytedance.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/af_inet.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 14e10214cf87..84ce4f86bc19 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1955,6 +1955,10 @@ static int __init inet_init(void)
 
 	ip_init();
 
+	/* Initialise per-cpu ipv4 mibs */
+	if (init_ipv4_mibs())
+		panic("%s: Cannot init ipv4 mibs\n", __func__);
+
 	/* Setup TCP slab cache for open requests. */
 	tcp_init();
 
@@ -1985,12 +1989,6 @@ static int __init inet_init(void)
 
 	if (init_inet_pernet_ops())
 		pr_crit("%s: Cannot init ipv4 inet pernet ops\n", __func__);
-	/*
-	 *	Initialise per-cpu ipv4 mibs
-	 */
-
-	if (init_ipv4_mibs())
-		pr_crit("%s: Cannot init ipv4 mibs\n", __func__);
 
 	ipv4_proc_init();
 
-- 
Gitee


From 83dc9c3336ca51b73430e2d3c7fd4409cc4949bb Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2022 09:57:32 +0800
Subject: [PATCH 023/340] tracing: Fix check for trace_percpu_buffer validity
 in get_trace_buf()

stable inclusion
from linux-4.19.225
commit 24bb91f9536f79a13f85cfaae59be41fc8d9380d

--------------------------------

commit 823e670f7ed616d0ce993075c8afe0217885f79d upstream.

With the new osnoise tracer, we are seeing the below splat:
    Kernel attempted to read user page (c7d880000) - exploit attempt? (uid: 0)
    BUG: Unable to handle kernel data access on read at 0xc7d880000
    Faulting instruction address: 0xc0000000002ffa10
    Oops: Kernel access of bad area, sig: 11 [#1]
    LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries
    ...
    NIP [c0000000002ffa10] __trace_array_vprintk.part.0+0x70/0x2f0
    LR [c0000000002ff9fc] __trace_array_vprintk.part.0+0x5c/0x2f0
    Call Trace:
    [c0000008bdd73b80] [c0000000001c49cc] put_prev_task_fair+0x3c/0x60 (unreliable)
    [c0000008bdd73be0] [c000000000301430] trace_array_printk_buf+0x70/0x90
    [c0000008bdd73c00] [c0000000003178b0] trace_sched_switch_callback+0x250/0x290
    [c0000008bdd73c90] [c000000000e70d60] __schedule+0x410/0x710
    [c0000008bdd73d40] [c000000000e710c0] schedule+0x60/0x130
    [c0000008bdd73d70] [c000000000030614] interrupt_exit_user_prepare_main+0x264/0x270
    [c0000008bdd73de0] [c000000000030a70] syscall_exit_prepare+0x150/0x180
    [c0000008bdd73e10] [c00000000000c174] system_call_vectored_common+0xf4/0x278

osnoise tracer on ppc64le is triggering osnoise_taint() for negative
duration in get_int_safe_duration() called from
trace_sched_switch_callback()->thread_exit().

The problem though is that the check for a valid trace_percpu_buffer is
incorrect in get_trace_buf(). The check is being done after calculating
the pointer for the current cpu, rather than on the main percpu pointer.
Fix the check to be against trace_percpu_buffer.

Link: https://lkml.kernel.org/r/a920e4272e0b0635cf20c444707cbce1b2c8973d.1640255304.git.naveen.n.rao@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Fixes: e2ace001176dc9 ("tracing: Choose static tp_printk buffer by explicit nesting count")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/trace/trace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8f20a658dabb..5bd5eff5e0e0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2796,7 +2796,7 @@ static char *get_trace_buf(void)
 {
 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
 
-	if (!buffer || buffer->nesting >= 4)
+	if (!trace_percpu_buffer || buffer->nesting >= 4)
 		return NULL;
 
 	buffer->nesting++;
-- 
Gitee


From 22b3d9deb55b4479c4fa029b9bf29f28faba3d68 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Mon, 14 Mar 2022 09:57:33 +0800
Subject: [PATCH 024/340] tracing: Tag trace_percpu_buffer as a percpu pointer

stable inclusion
from linux-4.19.225
commit e9c3f28e2b9eaff371db65e4bd7ee170616147b4

--------------------------------

commit f28439db470cca8b6b082239314e9fd10bd39034 upstream.

Tag trace_percpu_buffer as a percpu pointer to resolve warnings
reported by sparse:
  /linux/kernel/trace/trace.c:3218:46: warning: incorrect type in initializer (different address spaces)
  /linux/kernel/trace/trace.c:3218:46:    expected void const [noderef] __percpu *__vpp_verify
  /linux/kernel/trace/trace.c:3218:46:    got struct trace_buffer_struct *
  /linux/kernel/trace/trace.c:3234:9: warning: incorrect type in initializer (different address spaces)
  /linux/kernel/trace/trace.c:3234:9:    expected void const [noderef] __percpu *__vpp_verify
  /linux/kernel/trace/trace.c:3234:9:    got int *

Link: https://lkml.kernel.org/r/ebabd3f23101d89cb75671b68b6f819f5edc830b.1640255304.git.naveen.n.rao@linux.vnet.ibm.com

Cc: stable@vger.kernel.org
Reported-by: kernel test robot <lkp@intel.com>
Fixes: 07d777fe8c398 ("tracing: Add percpu buffers for trace_printk()")
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/trace/trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 5bd5eff5e0e0..119dd5fd5840 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2786,7 +2786,7 @@ struct trace_buffer_struct {
 	char buffer[4][TRACE_BUF_SIZE];
 };
 
-static struct trace_buffer_struct *trace_percpu_buffer;
+static struct trace_buffer_struct __percpu *trace_percpu_buffer;
 
 /*
  * Thise allows for lockless recording.  If we're nested too deeply, then
@@ -2815,7 +2815,7 @@ static void put_trace_buf(void)
 
 static int alloc_percpu_trace_buffer(void)
 {
-	struct trace_buffer_struct *buffers;
+	struct trace_buffer_struct __percpu *buffers;
 
 	buffers = alloc_percpu(struct trace_buffer_struct);
 	if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
-- 
Gitee


From 731268506da2f4f872a1a7ac999f2d3efc3a684f Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Mar 2022 09:57:34 +0800
Subject: [PATCH 025/340] ipv6: Check attribute length for RTA_GATEWAY in
 multipath route

stable inclusion
from linux-4.19.225
commit 44c90b4e6bcbb10f4066b91e8da9c58d4609768c

--------------------------------

commit 4619bcf91399f00a40885100fb61d594d8454033 upstream.

Commit referenced in the Fixes tag used nla_memcpy for RTA_GATEWAY as
does the current nla_get_in6_addr. nla_memcpy protects against accessing
memory greater than what is in the attribute, but there is no check
requiring the attribute to have an IPv6 address. Add it.

Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: David Ahern <dsahern@kernel.org>
Cc: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/route.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 01f8e62302fa..a680498b1178 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4424,6 +4424,19 @@ static void ip6_route_mpath_notify(struct fib6_info *rt,
 		inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
 }
 
+static int fib6_gw_from_attr(struct in6_addr *gw, struct nlattr *nla,
+			     struct netlink_ext_ack *extack)
+{
+	if (nla_len(nla) < sizeof(*gw)) {
+		NL_SET_ERR_MSG(extack, "Invalid IPv6 address in RTA_GATEWAY");
+		return -EINVAL;
+	}
+
+	*gw = nla_get_in6_addr(nla);
+
+	return 0;
+}
+
 static int ip6_route_multipath_add(struct fib6_config *cfg,
 				   struct netlink_ext_ack *extack)
 {
@@ -4464,7 +4477,13 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla) {
-				r_cfg.fc_gateway = nla_get_in6_addr(nla);
+				int ret;
+
+				ret = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
+							extack);
+				if (ret)
+					return ret;
+
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
 			r_cfg.fc_encap = nla_find(attrs, attrlen, RTA_ENCAP);
-- 
Gitee


From 524a79cfbf597f0b92419d7ed05aa019e325d625 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Mar 2022 09:57:35 +0800
Subject: [PATCH 026/340] ipv6: Check attribute length for RTA_GATEWAY when
 deleting multipath route

stable inclusion
from linux-4.19.225
commit e30dead00aaa5a575b10117dad8edc126efc1873

--------------------------------

commit 1ff15a710a862db1101b97810af14aedc835a86a upstream.

Make sure RTA_GATEWAY for IPv6 multipath route has enough bytes to hold
an IPv6 address.

Fixes: 6b9ea5a64ed5 ("ipv6: fix multipath route replace error recovery")
Signed-off-by: David Ahern <dsahern@kernel.org>
Cc: Roopa Prabhu <roopa@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/route.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a680498b1178..0db7a3ca668a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4617,7 +4617,11 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla) {
-				nla_memcpy(&r_cfg.fc_gateway, nla, 16);
+				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
+							extack);
+				if (err)
+					return err;
+
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
 		}
-- 
Gitee


From 84502b29bb87b0bcb6790222e19f48e72ecdd984 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Mar 2022 09:57:36 +0800
Subject: [PATCH 027/340] ipv6: Continue processing multipath route even if
 gateway attribute is invalid

stable inclusion
from linux-4.19.225
commit e183929b7db0e84a48fdc009a7bcbf9137ad9061

--------------------------------

[ Upstream commit e30a845b0376eb51c9c94f56bbd53b2e08ba822f ]

ip6_route_multipath_del loop continues processing the multipath
attribute even if delete of a nexthop path fails. For consistency,
do the same if the gateway attribute is invalid.

Fixes: 1ff15a710a86 ("ipv6: Check attribute length for RTA_GATEWAY when deleting multipath route")
Signed-off-by: David Ahern <dsahern@kernel.org>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://lore.kernel.org/r/20220103171911.94739-1-dsahern@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/route.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 0db7a3ca668a..e6b021588eb0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4619,8 +4619,10 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
 			if (nla) {
 				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
 							extack);
-				if (err)
-					return err;
+				if (err) {
+					last_err = err;
+					goto next_rtnh;
+				}
 
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
@@ -4629,6 +4631,7 @@ static int ip6_route_multipath_del(struct fib6_config *cfg,
 		if (err)
 			last_err = err;
 
+next_rtnh:
 		rtnh = rtnh_next(rtnh, &remaining);
 	}
 
-- 
Gitee


From 96a7f55261aa69fce2594cd193d3e72950898692 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Mon, 14 Mar 2022 09:57:37 +0800
Subject: [PATCH 028/340] ipv6: Do cleanup if attribute validation fails in
 multipath route

stable inclusion
from linux-4.19.225
commit dd74b4e027324219e51d9821c0db4368c9f8526a

--------------------------------

[ Upstream commit 95bdba23b5b4aa75fe3e6c84335e638641c707bb ]

As Nicolas noted, if gateway validation fails walking the multipath
attribute the code should jump to the cleanup to free previously
allocated memory.

Fixes: 1ff15a710a86 ("ipv6: Check attribute length for RTA_GATEWAY when deleting multipath route")
Signed-off-by: David Ahern <dsahern@kernel.org>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://lore.kernel.org/r/20220103170555.94638-1-dsahern@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/route.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e6b021588eb0..80dd55c436fa 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4477,12 +4477,10 @@ static int ip6_route_multipath_add(struct fib6_config *cfg,
 
 			nla = nla_find(attrs, attrlen, RTA_GATEWAY);
 			if (nla) {
-				int ret;
-
-				ret = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
+				err = fib6_gw_from_attr(&r_cfg.fc_gateway, nla,
 							extack);
-				if (ret)
-					return ret;
+				if (err)
+					goto cleanup;
 
 				r_cfg.fc_flags |= RTF_GATEWAY;
 			}
-- 
Gitee


From 453f6d74ffada5e8b76149619375f5a29f729d7a Mon Sep 17 00:00:00 2001
From: Lixiaokeng <lixiaokeng@huawei.com>
Date: Mon, 14 Mar 2022 09:57:38 +0800
Subject: [PATCH 029/340] scsi: libiscsi: Fix UAF in
 iscsi_conn_get_param()/iscsi_conn_teardown()

stable inclusion
from linux-4.19.225
commit f5596fdc65ce280cb4debb507c0c5877d1a89936

--------------------------------

[ Upstream commit 1b8d0300a3e9f216ae4901bab886db7299899ec6 ]

|- iscsi_if_destroy_conn            |-dev_attr_show
 |-iscsi_conn_teardown
  |-spin_lock_bh                     |-iscsi_sw_tcp_conn_get_param

  |-kfree(conn->persistent_address)   |-iscsi_conn_get_param
  |-kfree(conn->local_ipaddr)
                                       ==>|-read persistent_address
                                       ==>|-read local_ipaddr
  |-spin_unlock_bh

When iscsi_conn_teardown() and iscsi_conn_get_param() happen in parallel, a
UAF may be triggered.

Link: https://lore.kernel.org/r/046ec8a0-ce95-d3fc-3235-666a7c65b224@huawei.com
Reported-by: Lu Tixiong <lutianxiong@huawei.com>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Reviewed-by: Lee Duncan <lduncan@suse.com>
Signed-off-by: Lixiaokeng <lixiaokeng@huawei.com>
Signed-off-by: Linfeilong <linfeilong@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/scsi/libiscsi.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 02060aec08bf..a93a47d4931f 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -3133,6 +3133,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 {
 	struct iscsi_conn *conn = cls_conn->dd_data;
 	struct iscsi_session *session = conn->session;
+	char *tmp_persistent_address = conn->persistent_address;
+	char *tmp_local_ipaddr = conn->local_ipaddr;
 
 	del_timer_sync(&conn->transport_timer);
 
@@ -3156,8 +3158,6 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	spin_lock_bh(&session->frwd_lock);
 	free_pages((unsigned long) conn->data,
 		   get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
-	kfree(conn->persistent_address);
-	kfree(conn->local_ipaddr);
 	/* regular RX path uses back_lock */
 	spin_lock_bh(&session->back_lock);
 	kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
@@ -3169,6 +3169,8 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	mutex_unlock(&session->eh_mutex);
 
 	iscsi_destroy_conn(cls_conn);
+	kfree(tmp_persistent_address);
+	kfree(tmp_local_ipaddr);
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_teardown);
 
-- 
Gitee


From fbdd31a66fd4702dbccdf3a0b6458246b7b1c5c5 Mon Sep 17 00:00:00 2001
From: William Zhao <wizhao@redhat.com>
Date: Mon, 14 Mar 2022 09:57:39 +0800
Subject: [PATCH 030/340] ip6_vti: initialize __ip6_tnl_parm struct in
 vti6_siocdevprivate

stable inclusion
from linux-4.19.225
commit 6f1ce654347ad16977afcb315fd9a9114ed9739d

--------------------------------

[ Upstream commit c1833c3964d5bd8c163bd4e01736a38bc473cb8a ]

The "__ip6_tnl_parm" struct was left uninitialized causing an invalid
load of random data when the "__ip6_tnl_parm" struct was used elsewhere.
As an example, in the function "ip6_tnl_xmit_ctl()", it tries to access
the "collect_md" member. With "__ip6_tnl_parm" being uninitialized and
containing random data, the UBSAN detected that "collect_md" held a
non-boolean value.

The UBSAN issue is as follows:

===============================================================
UBSAN: invalid-load in net/ipv6/ip6_tunnel.c:1025:14
load of value 30 is not a valid value for type '_Bool'
CPU: 1 PID: 228 Comm: kworker/1:3 Not tainted 5.16.0-rc4+ #8
Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
Workqueue: ipv6_addrconf addrconf_dad_work
Call Trace:
<TASK>
dump_stack_lvl+0x44/0x57
ubsan_epilogue+0x5/0x40
__ubsan_handle_load_invalid_value+0x66/0x70
? __cpuhp_setup_state+0x1d3/0x210
ip6_tnl_xmit_ctl.cold.52+0x2c/0x6f [ip6_tunnel]
vti6_tnl_xmit+0x79c/0x1e96 [ip6_vti]
? lock_is_held_type+0xd9/0x130
? vti6_rcv+0x100/0x100 [ip6_vti]
? lock_is_held_type+0xd9/0x130
? rcu_read_lock_bh_held+0xc0/0xc0
? lock_acquired+0x262/0xb10
dev_hard_start_xmit+0x1e6/0x820
__dev_queue_xmit+0x2079/0x3340
? mark_lock.part.52+0xf7/0x1050
? netdev_core_pick_tx+0x290/0x290
? kvm_clock_read+0x14/0x30
? kvm_sched_clock_read+0x5/0x10
? sched_clock_cpu+0x15/0x200
? find_held_lock+0x3a/0x1c0
? lock_release+0x42f/0xc90
? lock_downgrade+0x6b0/0x6b0
? mark_held_locks+0xb7/0x120
? neigh_connected_output+0x31f/0x470
? lockdep_hardirqs_on+0x79/0x100
? neigh_connected_output+0x31f/0x470
? ip6_finish_output2+0x9b0/0x1d90
? rcu_read_lock_bh_held+0x62/0xc0
? ip6_finish_output2+0x9b0/0x1d90
ip6_finish_output2+0x9b0/0x1d90
? ip6_append_data+0x330/0x330
? ip6_mtu+0x166/0x370
? __ip6_finish_output+0x1ad/0xfb0
? nf_hook_slow+0xa6/0x170
ip6_output+0x1fb/0x710
? nf_hook.constprop.32+0x317/0x430
? ip6_finish_output+0x180/0x180
? __ip6_finish_output+0xfb0/0xfb0
? lock_is_held_type+0xd9/0x130
ndisc_send_skb+0xb33/0x1590
? __sk_mem_raise_allocated+0x11cf/0x1560
? dst_output+0x4a0/0x4a0
? ndisc_send_rs+0x432/0x610
addrconf_dad_completed+0x30c/0xbb0
? addrconf_rs_timer+0x650/0x650
? addrconf_dad_work+0x73c/0x10e0
addrconf_dad_work+0x73c/0x10e0
? addrconf_dad_completed+0xbb0/0xbb0
? rcu_read_lock_sched_held+0xaf/0xe0
? rcu_read_lock_bh_held+0xc0/0xc0
process_one_work+0x97b/0x1740
? pwq_dec_nr_in_flight+0x270/0x270
worker_thread+0x87/0xbf0
? process_one_work+0x1740/0x1740
kthread+0x3ac/0x490
? set_kthread_struct+0x100/0x100
ret_from_fork+0x22/0x30
</TASK>
===============================================================

The solution is to initialize "__ip6_tnl_parm" struct to zeros in the
"vti6_siocdevprivate()" function.

Signed-off-by: William Zhao <wizhao@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/ip6_vti.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 290badfe70e0..866ce815625e 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -799,6 +799,8 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct net *net = dev_net(dev);
 	struct vti6_net *ip6n = net_generic(net, vti6_net_id);
 
+	memset(&p1, 0, sizeof(p1));
+
 	switch (cmd) {
 	case SIOCGETTUNNEL:
 		if (dev == ip6n->fb_tnl_dev) {
-- 
Gitee


From 31cad6a126132fc5266c830e25db495149d7c429 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Mon, 14 Mar 2022 09:57:40 +0800
Subject: [PATCH 031/340] can: bcm: switch timer to HRTIMER_MODE_SOFT and
 remove hrtimer_tasklet

stable inclusion
from linux-4.19.226
commit 79305a826f872fe446c6fbf8450f515053ef6951

--------------------------------

commit bf74aa86e111aa3b2fbb25db37e3a3fab71b5b68 upstream.

This patch switches the timer to HRTIMER_MODE_SOFT, which executed the
timer callback in softirq context and removes the hrtimer_tasklet.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Anna-Maria Gleixner <anna-maria@linutronix.de>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/can/bcm.c | 156 +++++++++++++++++---------------------------------
 1 file changed, 52 insertions(+), 104 deletions(-)

diff --git a/net/can/bcm.c b/net/can/bcm.c
index e66377e764ba..ffc34e911808 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -105,7 +105,6 @@ struct bcm_op {
 	unsigned long frames_abs, frames_filtered;
 	struct bcm_timeval ival1, ival2;
 	struct hrtimer timer, thrtimer;
-	struct tasklet_struct tsklet, thrtsklet;
 	ktime_t rx_stamp, kt_ival1, kt_ival2, kt_lastmsg;
 	int rx_ifindex;
 	int cfsiz;
@@ -370,25 +369,34 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head,
 	}
 }
 
-static void bcm_tx_start_timer(struct bcm_op *op)
+static bool bcm_tx_set_expiry(struct bcm_op *op, struct hrtimer *hrt)
 {
+	ktime_t ival;
+
 	if (op->kt_ival1 && op->count)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival1),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival1;
 	else if (op->kt_ival2)
-		hrtimer_start(&op->timer,
-			      ktime_add(ktime_get(), op->kt_ival2),
-			      HRTIMER_MODE_ABS);
+		ival = op->kt_ival2;
+	else
+		return false;
+
+	hrtimer_set_expires(hrt, ktime_add(ktime_get(), ival));
+	return true;
 }
 
-static void bcm_tx_timeout_tsklet(unsigned long data)
+static void bcm_tx_start_timer(struct bcm_op *op)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	if (bcm_tx_set_expiry(op, &op->timer))
+		hrtimer_start_expires(&op->timer, HRTIMER_MODE_ABS_SOFT);
+}
+
+/* bcm_tx_timeout_handler - performs cyclic CAN frame transmissions */
+static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
+{
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
 	if (op->kt_ival1 && (op->count > 0)) {
-
 		op->count--;
 		if (!op->count && (op->flags & TX_COUNTEVT)) {
 
@@ -406,22 +414,12 @@ static void bcm_tx_timeout_tsklet(unsigned long data)
 		}
 		bcm_can_tx(op);
 
-	} else if (op->kt_ival2)
+	} else if (op->kt_ival2) {
 		bcm_can_tx(op);
+	}
 
-	bcm_tx_start_timer(op);
-}
-
-/*
- * bcm_tx_timeout_handler - performs cyclic CAN frame transmissions
- */
-static enum hrtimer_restart bcm_tx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	tasklet_schedule(&op->tsklet);
-
-	return HRTIMER_NORESTART;
+	return bcm_tx_set_expiry(op, &op->timer) ?
+		HRTIMER_RESTART : HRTIMER_NORESTART;
 }
 
 /*
@@ -488,7 +486,7 @@ static void bcm_rx_update_and_send(struct bcm_op *op,
 		/* do not send the saved data - only start throttle timer */
 		hrtimer_start(&op->thrtimer,
 			      ktime_add(op->kt_lastmsg, op->kt_ival2),
-			      HRTIMER_MODE_ABS);
+			      HRTIMER_MODE_ABS_SOFT);
 		return;
 	}
 
@@ -547,14 +545,21 @@ static void bcm_rx_starttimer(struct bcm_op *op)
 		return;
 
 	if (op->kt_ival1)
-		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL);
+		hrtimer_start(&op->timer, op->kt_ival1, HRTIMER_MODE_REL_SOFT);
 }
 
-static void bcm_rx_timeout_tsklet(unsigned long data)
+/* bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out */
+static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 {
-	struct bcm_op *op = (struct bcm_op *)data;
+	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
 	struct bcm_msg_head msg_head;
 
+	/* if user wants to be informed, when cyclic CAN-Messages come back */
+	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
+		/* clear received CAN frames to indicate 'nothing received' */
+		memset(op->last_frames, 0, op->nframes * op->cfsiz);
+	}
+
 	/* create notification to user */
 	memset(&msg_head, 0, sizeof(msg_head));
 	msg_head.opcode  = RX_TIMEOUT;
@@ -566,25 +571,6 @@ static void bcm_rx_timeout_tsklet(unsigned long data)
 	msg_head.nframes = 0;
 
 	bcm_send_to_user(op, &msg_head, NULL, 0);
-}
-
-/*
- * bcm_rx_timeout_handler - when the (cyclic) CAN frame reception timed out
- */
-static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
-{
-	struct bcm_op *op = container_of(hrtimer, struct bcm_op, timer);
-
-	/* schedule before NET_RX_SOFTIRQ */
-	tasklet_hi_schedule(&op->tsklet);
-
-	/* no restart of the timer is done here! */
-
-	/* if user wants to be informed, when cyclic CAN-Messages come back */
-	if ((op->flags & RX_ANNOUNCE_RESUME) && op->last_frames) {
-		/* clear received CAN frames to indicate 'nothing received' */
-		memset(op->last_frames, 0, op->nframes * op->cfsiz);
-	}
 
 	return HRTIMER_NORESTART;
 }
@@ -592,14 +578,12 @@ static enum hrtimer_restart bcm_rx_timeout_handler(struct hrtimer *hrtimer)
 /*
  * bcm_rx_do_flush - helper for bcm_rx_thr_flush
  */
-static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
-				  unsigned int index)
+static inline int bcm_rx_do_flush(struct bcm_op *op, unsigned int index)
 {
 	struct canfd_frame *lcf = op->last_frames + op->cfsiz * index;
 
 	if ((op->last_frames) && (lcf->flags & RX_THR)) {
-		if (update)
-			bcm_rx_changed(op, lcf);
+		bcm_rx_changed(op, lcf);
 		return 1;
 	}
 	return 0;
@@ -607,11 +591,8 @@ static inline int bcm_rx_do_flush(struct bcm_op *op, int update,
 
 /*
  * bcm_rx_thr_flush - Check for throttled data and send it to the userspace
- *
- * update == 0 : just check if throttled data is available  (any irq context)
- * update == 1 : check and send throttled data to userspace (soft_irq context)
  */
-static int bcm_rx_thr_flush(struct bcm_op *op, int update)
+static int bcm_rx_thr_flush(struct bcm_op *op)
 {
 	int updated = 0;
 
@@ -620,24 +601,16 @@ static int bcm_rx_thr_flush(struct bcm_op *op, int update)
 
 		/* for MUX filter we start at index 1 */
 		for (i = 1; i < op->nframes; i++)
-			updated += bcm_rx_do_flush(op, update, i);
+			updated += bcm_rx_do_flush(op, i);
 
 	} else {
 		/* for RX_FILTER_ID and simple filter */
-		updated += bcm_rx_do_flush(op, update, 0);
+		updated += bcm_rx_do_flush(op, 0);
 	}
 
 	return updated;
 }
 
-static void bcm_rx_thr_tsklet(unsigned long data)
-{
-	struct bcm_op *op = (struct bcm_op *)data;
-
-	/* push the changed data to the userspace */
-	bcm_rx_thr_flush(op, 1);
-}
-
 /*
  * bcm_rx_thr_handler - the time for blocked content updates is over now:
  *                      Check for throttled data and send it to the userspace
@@ -646,9 +619,7 @@ static enum hrtimer_restart bcm_rx_thr_handler(struct hrtimer *hrtimer)
 {
 	struct bcm_op *op = container_of(hrtimer, struct bcm_op, thrtimer);
 
-	tasklet_schedule(&op->thrtsklet);
-
-	if (bcm_rx_thr_flush(op, 0)) {
+	if (bcm_rx_thr_flush(op)) {
 		hrtimer_forward(hrtimer, ktime_get(), op->kt_ival2);
 		return HRTIMER_RESTART;
 	} else {
@@ -744,23 +715,8 @@ static struct bcm_op *bcm_find_op(struct list_head *ops,
 
 static void bcm_remove_op(struct bcm_op *op)
 {
-	if (op->tsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->tsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->tsklet.state) ||
-		       hrtimer_active(&op->timer)) {
-			hrtimer_cancel(&op->timer);
-			tasklet_kill(&op->tsklet);
-		}
-	}
-
-	if (op->thrtsklet.func) {
-		while (test_bit(TASKLET_STATE_SCHED, &op->thrtsklet.state) ||
-		       test_bit(TASKLET_STATE_RUN, &op->thrtsklet.state) ||
-		       hrtimer_active(&op->thrtimer)) {
-			hrtimer_cancel(&op->thrtimer);
-			tasklet_kill(&op->thrtsklet);
-		}
-	}
+	hrtimer_cancel(&op->timer);
+	hrtimer_cancel(&op->thrtimer);
 
 	if ((op->frames) && (op->frames != &op->sframe))
 		kfree(op->frames);
@@ -994,15 +950,13 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		op->ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->timer.function = bcm_tx_timeout_handler;
 
-		/* initialize tasklet for tx countevent notification */
-		tasklet_init(&op->tsklet, bcm_tx_timeout_tsklet,
-			     (unsigned long) op);
-
 		/* currently unused in tx_ops */
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 
 		/* add this bcm_op to the list of the tx_ops */
 		list_add(&op->list, &bo->tx_ops);
@@ -1171,20 +1125,14 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		op->rx_ifindex = ifindex;
 
 		/* initialize uninitialized (kzalloc) structure */
-		hrtimer_init(&op->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->timer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->timer.function = bcm_rx_timeout_handler;
 
-		/* initialize tasklet for rx timeout notification */
-		tasklet_init(&op->tsklet, bcm_rx_timeout_tsklet,
-			     (unsigned long) op);
-
-		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+		hrtimer_init(&op->thrtimer, CLOCK_MONOTONIC,
+			     HRTIMER_MODE_REL_SOFT);
 		op->thrtimer.function = bcm_rx_thr_handler;
 
-		/* initialize tasklet for rx throttle handling */
-		tasklet_init(&op->thrtsklet, bcm_rx_thr_tsklet,
-			     (unsigned long) op);
-
 		/* add this bcm_op to the list of the rx_ops */
 		list_add(&op->list, &bo->rx_ops);
 
@@ -1230,12 +1178,12 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			 */
 			op->kt_lastmsg = 0;
 			hrtimer_cancel(&op->thrtimer);
-			bcm_rx_thr_flush(op, 1);
+			bcm_rx_thr_flush(op);
 		}
 
 		if ((op->flags & STARTTIMER) && op->kt_ival1)
 			hrtimer_start(&op->timer, op->kt_ival1,
-				      HRTIMER_MODE_REL);
+				      HRTIMER_MODE_REL_SOFT);
 	}
 
 	/* now we can register for can_ids, if we added a new bcm_op */
-- 
Gitee


From 6d8a0b1475a1327444198df5626f58dd650c414c Mon Sep 17 00:00:00 2001
From: Gang Li <ligang.bdlg@bytedance.com>
Date: Mon, 14 Mar 2022 09:57:41 +0800
Subject: [PATCH 032/340] shmem: fix a race between shmem_unused_huge_shrink
 and shmem_evict_inode

stable inclusion
from linux-4.19.226
commit 9a3354fc78b7d7a79026053272bb0b74900c28c1

--------------------------------

commit 62c9827cbb996c2c04f615ecd783ce28bcea894b upstream.

Fix a data race in commit 779750d20b93 ("shmem: split huge pages beyond
i_size under memory pressure").

Here are call traces causing race:

   Call Trace 1:
     shmem_unused_huge_shrink+0x3ae/0x410
     ? __list_lru_walk_one.isra.5+0x33/0x160
     super_cache_scan+0x17c/0x190
     shrink_slab.part.55+0x1ef/0x3f0
     shrink_node+0x10e/0x330
     kswapd+0x380/0x740
     kthread+0xfc/0x130
     ? mem_cgroup_shrink_node+0x170/0x170
     ? kthread_create_on_node+0x70/0x70
     ret_from_fork+0x1f/0x30

   Call Trace 2:
     shmem_evict_inode+0xd8/0x190
     evict+0xbe/0x1c0
     do_unlinkat+0x137/0x330
     do_syscall_64+0x76/0x120
     entry_SYSCALL_64_after_hwframe+0x3d/0xa2

A simple explanation:

Image there are 3 items in the local list (@list).  In the first
traversal, A is not deleted from @list.

  1)    A->B->C
        ^
        |
        pos (leave)

In the second traversal, B is deleted from @list.  Concurrently, A is
deleted from @list through shmem_evict_inode() since last reference
counter of inode is dropped by other thread.  Then the @list is corrupted.

  2)    A->B->C
        ^  ^
        |  |
     evict pos (drop)

We should make sure the inode is either on the global list or deleted from
any local list before iput().

Fixed by moving inodes back to global list before we put them.

[akpm@linux-foundation.org: coding style fixes]

Link: https://lkml.kernel.org/r/20211125064502.99983-1-ligang.bdlg@bytedance.com
Fixes: 779750d20b93 ("shmem: split huge pages beyond i_size under memory pressure")
Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
Reviewed-by: Muchun Song <songmuchun@bytedance.com>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 mm/shmem.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/mm/shmem.c b/mm/shmem.c
index bc62dc732781..4363dbc8d57e 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -541,7 +541,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 	struct shmem_inode_info *info;
 	struct page *page;
 	unsigned long batch = sc ? sc->nr_to_scan : 128;
-	int removed = 0, split = 0;
+	int split = 0;
 
 	if (list_empty(&sbinfo->shrinklist))
 		return SHRINK_STOP;
@@ -556,7 +556,6 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		/* inode is about to be evicted */
 		if (!inode) {
 			list_del_init(&info->shrinklist);
-			removed++;
 			goto next;
 		}
 
@@ -564,12 +563,12 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		if (round_up(inode->i_size, PAGE_SIZE) ==
 				round_up(inode->i_size, HPAGE_PMD_SIZE)) {
 			list_move(&info->shrinklist, &to_remove);
-			removed++;
 			goto next;
 		}
 
 		list_move(&info->shrinklist, &list);
 next:
+		sbinfo->shrinklist_len--;
 		if (!--batch)
 			break;
 	}
@@ -589,7 +588,7 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		inode = &info->vfs_inode;
 
 		if (nr_to_split && split >= nr_to_split)
-			goto leave;
+			goto move_back;
 
 		page = find_get_page(inode->i_mapping,
 				(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
@@ -603,38 +602,44 @@ static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
 		}
 
 		/*
-		 * Leave the inode on the list if we failed to lock
-		 * the page at this time.
+		 * Move the inode on the list back to shrinklist if we failed
+		 * to lock the page at this time.
 		 *
 		 * Waiting for the lock may lead to deadlock in the
 		 * reclaim path.
 		 */
 		if (!trylock_page(page)) {
 			put_page(page);
-			goto leave;
+			goto move_back;
 		}
 
 		ret = split_huge_page(page);
 		unlock_page(page);
 		put_page(page);
 
-		/* If split failed leave the inode on the list */
+		/* If split failed move the inode on the list back to shrinklist */
 		if (ret)
-			goto leave;
+			goto move_back;
 
 		split++;
 drop:
 		list_del_init(&info->shrinklist);
-		removed++;
-leave:
+		goto put;
+move_back:
+		/*
+		 * Make sure the inode is either on the global list or deleted
+		 * from any local list before iput() since it could be deleted
+		 * in another thread once we put the inode (then the local list
+		 * is corrupted).
+		 */
+		spin_lock(&sbinfo->shrinklist_lock);
+		list_move(&info->shrinklist, &sbinfo->shrinklist);
+		sbinfo->shrinklist_len++;
+		spin_unlock(&sbinfo->shrinklist_lock);
+put:
 		iput(inode);
 	}
 
-	spin_lock(&sbinfo->shrinklist_lock);
-	list_splice_tail(&list, &sbinfo->shrinklist);
-	sbinfo->shrinklist_len -= removed;
-	spin_unlock(&sbinfo->shrinklist_lock);
-
 	return split;
 }
 
-- 
Gitee


From 3e6f1f0c864b6ce739cfe25ac168f789a36633e9 Mon Sep 17 00:00:00 2001
From: Chengfeng Ye <cyeaa@connect.ust.hk>
Date: Mon, 14 Mar 2022 09:57:42 +0800
Subject: [PATCH 033/340] crypto: qce - fix uaf on qce_ahash_register_one

stable inclusion
from linux-4.19.226
commit 1aa6bac2dc1bce5f6afbb095c0ace374cc653947

--------------------------------

[ Upstream commit b4cb4d31631912842eb7dce02b4350cbb7562d5e ]

Pointer base points to sub field of tmpl, it
is dereferenced after tmpl is freed. Fix
this by accessing base before free tmpl.

Fixes: ec8f5d8f ("crypto: qce - Qualcomm crypto engine driver")
Signed-off-by: Chengfeng Ye <cyeaa@connect.ust.hk>
Acked-by: Thara Gopinath <thara.gopinath@linaro.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/crypto/qce/sha.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/qce/sha.c b/drivers/crypto/qce/sha.c
index d8a5db11b7ea..bffd4d15145d 100644
--- a/drivers/crypto/qce/sha.c
+++ b/drivers/crypto/qce/sha.c
@@ -521,8 +521,8 @@ static int qce_ahash_register_one(const struct qce_ahash_def *def,
 
 	ret = crypto_register_ahash(alg);
 	if (ret) {
-		kfree(tmpl);
 		dev_err(qce->dev, "%s registration failed\n", base->cra_name);
+		kfree(tmpl);
 		return ret;
 	}
 
-- 
Gitee


From 9033611692c77c7b38bbfd5fa098850e3025b6f8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 14 Mar 2022 09:57:43 +0800
Subject: [PATCH 034/340] netfilter: bridge: add support for pppoe filtering

stable inclusion
from linux-4.19.226
commit 519f563eca1b89965dd406a8f9807ca76562d202

--------------------------------

[ Upstream commit 28b78ecffea8078d81466b2e01bb5a154509f1ba ]

This makes 'bridge-nf-filter-pppoe-tagged' sysctl work for
bridged traffic.

Looking at the original commit it doesn't appear this ever worked:

 static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
[..]
        if (skb->protocol == htons(ETH_P_8021Q)) {
                skb_pull(skb, VLAN_HLEN);
                skb->network_header += VLAN_HLEN;
+       } else if (skb->protocol == htons(ETH_P_PPP_SES)) {
+               skb_pull(skb, PPPOE_SES_HLEN);
+               skb->network_header += PPPOE_SES_HLEN;
        }
 [..]
	NF_HOOK(... POST_ROUTING, ...)

... but the adjusted offsets are never restored.

The alternative would be to rip this code out for good,
but otoh we'd have to keep this anyway for the vlan handling
(which works because vlan tag info is in the skb, not the packet
 payload).

Reported-and-tested-by: Amish Chana <amish@3g.co.za>
Fixes: 516299d2f5b6f97 ("[NETFILTER]: bridge-nf: filter bridged IPv4/IPv6 encapsulated in pppoe traffic")
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/bridge/br_netfilter_hooks.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index a403179eea49..fd5e3470560a 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -727,6 +727,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 	if (nf_bridge->frag_max_size && nf_bridge->frag_max_size < mtu)
 		mtu = nf_bridge->frag_max_size;
 
+	nf_bridge_update_protocol(skb);
+	nf_bridge_push_encap_header(skb);
+
 	if (skb_is_gso(skb) || skb->len + mtu_reserved <= mtu) {
 		nf_bridge_info_free(skb);
 		return br_dev_queue_push_xmit(net, sk, skb);
@@ -744,8 +747,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 
 		IPCB(skb)->frag_max_size = nf_bridge->frag_max_size;
 
-		nf_bridge_update_protocol(skb);
-
 		data = this_cpu_ptr(&brnf_frag_data_storage);
 
 		data->vlan_tci = skb->vlan_tci;
@@ -768,8 +769,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff
 
 		IP6CB(skb)->frag_max_size = nf_bridge->frag_max_size;
 
-		nf_bridge_update_protocol(skb);
-
 		data = this_cpu_ptr(&brnf_frag_data_storage);
 		data->encap_size = nf_bridge_encap_header_len(skb);
 		data->size = ETH_HLEN + data->encap_size;
-- 
Gitee


From 00128133d5b8c69d9f09e7f3819b7e3c949e2fb4 Mon Sep 17 00:00:00 2001
From: Lizhi Hou <lizhi.hou@xilinx.com>
Date: Mon, 14 Mar 2022 09:57:44 +0800
Subject: [PATCH 035/340] tty: serial: uartlite: allow 64 bit address

stable inclusion
from linux-4.19.226
commit 628c2372100a118f5500f596c5c941720adbcc28

--------------------------------

[ Upstream commit 3672fb65155530b5eea6225685c75329b6debec3 ]

The base address of uartlite registers could be 64 bit address which is from
device resource. When ulite_probe() calls ulite_assign(), this 64 bit
address is casted to 32-bit. The fix is to replace "u32" type with
"phys_addr_t" type for the base address in ulite_assign() argument list.

Fixes: 8fa7b6100693 ("[POWERPC] Uartlite: Separate the bus binding from the driver proper")
Signed-off-by: Lizhi Hou <lizhi.hou@xilinx.com>
Link: https://lore.kernel.org/r/20211129202302.1319033-1-lizhi.hou@xilinx.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/uartlite.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/uartlite.c b/drivers/tty/serial/uartlite.c
index 8df305822668..5d1b7455e627 100644
--- a/drivers/tty/serial/uartlite.c
+++ b/drivers/tty/serial/uartlite.c
@@ -618,7 +618,7 @@ static struct uart_driver ulite_uart_driver = {
  *
  * Returns: 0 on success, <0 otherwise
  */
-static int ulite_assign(struct device *dev, int id, u32 base, int irq,
+static int ulite_assign(struct device *dev, int id, phys_addr_t base, int irq,
 			struct uartlite_data *pdata)
 {
 	struct uart_port *port;
-- 
Gitee


From ba8b1d7d1ec67cb3f27316bfcfb9a63e531be6f7 Mon Sep 17 00:00:00 2001
From: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Date: Mon, 14 Mar 2022 09:57:45 +0800
Subject: [PATCH 036/340] serial: amba-pl011: do not request memory region
 twice
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.226
commit 0d9a1f0245ed6cadb37fabdf204b67f230160ca0

--------------------------------

[ Upstream commit d1180405c7b5c7a1c6bde79d5fc24fe931430737 ]

With commit 3873e2d7f63a ("drivers: PL011: refactor pl011_probe()") the
function devm_ioremap() called from pl011_setup_port() was replaced with
devm_ioremap_resource(). Since this function not only remaps but also
requests the ports io memory region it now collides with the .config_port()
callback which requests the same region at uart port registration.

Since devm_ioremap_resource() already claims the memory successfully, the
request in .config_port() fails.

Later at uart port deregistration the attempt to release the unclaimed
memory also fails. The failure results in a “Trying to free nonexistent
resource" warning.

Fix these issues by removing the callbacks that implement the redundant
memory allocation/release. Also make sure that changing the drivers io
memory base address via TIOCSSERIAL is not allowed any more.

Fixes: 3873e2d7f63a ("drivers: PL011: refactor pl011_probe()")
Signed-off-by: Lino Sanfilippo <LinoSanfilippo@gmx.de>
Link: https://lore.kernel.org/r/20211129174238.8333-1-LinoSanfilippo@gmx.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/amba-pl011.c | 27 +++------------------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/drivers/tty/serial/amba-pl011.c b/drivers/tty/serial/amba-pl011.c
index 1d27e006826d..aae97acd183a 100644
--- a/drivers/tty/serial/amba-pl011.c
+++ b/drivers/tty/serial/amba-pl011.c
@@ -2160,32 +2160,13 @@ static const char *pl011_type(struct uart_port *port)
 	return uap->port.type == PORT_AMBA ? uap->type : NULL;
 }
 
-/*
- * Release the memory region(s) being used by 'port'
- */
-static void pl011_release_port(struct uart_port *port)
-{
-	release_mem_region(port->mapbase, SZ_4K);
-}
-
-/*
- * Request the memory region(s) being used by 'port'
- */
-static int pl011_request_port(struct uart_port *port)
-{
-	return request_mem_region(port->mapbase, SZ_4K, "uart-pl011")
-			!= NULL ? 0 : -EBUSY;
-}
-
 /*
  * Configure/autoconfigure the port.
  */
 static void pl011_config_port(struct uart_port *port, int flags)
 {
-	if (flags & UART_CONFIG_TYPE) {
+	if (flags & UART_CONFIG_TYPE)
 		port->type = PORT_AMBA;
-		pl011_request_port(port);
-	}
 }
 
 /*
@@ -2200,6 +2181,8 @@ static int pl011_verify_port(struct uart_port *port, struct serial_struct *ser)
 		ret = -EINVAL;
 	if (ser->baud_base < 9600)
 		ret = -EINVAL;
+	if (port->mapbase != (unsigned long) ser->iomem_base)
+		ret = -EINVAL;
 	return ret;
 }
 
@@ -2217,8 +2200,6 @@ static const struct uart_ops amba_pl011_pops = {
 	.flush_buffer	= pl011_dma_flush_buffer,
 	.set_termios	= pl011_set_termios,
 	.type		= pl011_type,
-	.release_port	= pl011_release_port,
-	.request_port	= pl011_request_port,
 	.config_port	= pl011_config_port,
 	.verify_port	= pl011_verify_port,
 #ifdef CONFIG_CONSOLE_POLL
@@ -2248,8 +2229,6 @@ static const struct uart_ops sbsa_uart_pops = {
 	.shutdown	= sbsa_uart_shutdown,
 	.set_termios	= sbsa_uart_set_termios,
 	.type		= pl011_type,
-	.release_port	= pl011_release_port,
-	.request_port	= pl011_request_port,
 	.config_port	= pl011_config_port,
 	.verify_port	= pl011_verify_port,
 #ifdef CONFIG_CONSOLE_POLL
-- 
Gitee


From cc97ecdb978753debcc99677f337aa231851d2f9 Mon Sep 17 00:00:00 2001
From: Li Hua <hucool.lihua@huawei.com>
Date: Mon, 14 Mar 2022 09:57:46 +0800
Subject: [PATCH 037/340] sched/rt: Try to restart rt period timer when rt
 runtime exceeded

stable inclusion
from linux-4.19.226
commit e6bc7279b16517fab9ed3cdbd58ad7b08060c246

--------------------------------

[ Upstream commit 9b58e976b3b391c0cf02e038d53dd0478ed3013c ]

When rt_runtime is modified from -1 to a valid control value, it may
cause the task to be throttled all the time. Operations like the following
will trigger the bug. E.g:

  1. echo -1 > /proc/sys/kernel/sched_rt_runtime_us
  2. Run a FIFO task named A that executes while(1)
  3. echo 950000 > /proc/sys/kernel/sched_rt_runtime_us

When rt_runtime is -1, The rt period timer will not be activated when task
A enqueued. And then the task will be throttled after setting rt_runtime to
950,000. The task will always be throttled because the rt period timer is
not activated.

Fixes: d0b27fa77854 ("sched: rt-group: synchonised bandwidth period")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Li Hua <hucool.lihua@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20211203033618.11895-1-hucool.lihua@huawei.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/sched/rt.c | 23 ++++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5a9c27956792..301ba04d9130 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -50,11 +50,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
 	rt_b->rt_period_timer.function = sched_rt_period_timer;
 }
 
-static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+static inline void do_start_rt_bandwidth(struct rt_bandwidth *rt_b)
 {
-	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
-		return;
-
 	raw_spin_lock(&rt_b->rt_runtime_lock);
 	if (!rt_b->rt_period_active) {
 		rt_b->rt_period_active = 1;
@@ -72,6 +69,14 @@ static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
 	raw_spin_unlock(&rt_b->rt_runtime_lock);
 }
 
+static void start_rt_bandwidth(struct rt_bandwidth *rt_b)
+{
+	if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF)
+		return;
+
+	do_start_rt_bandwidth(rt_b);
+}
+
 void init_rt_rq(struct rt_rq *rt_rq)
 {
 	struct rt_prio_array *array;
@@ -979,13 +984,17 @@ static void update_curr_rt(struct rq *rq)
 
 	for_each_sched_rt_entity(rt_se) {
 		struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+		int exceeded;
 
 		if (sched_rt_runtime(rt_rq) != RUNTIME_INF) {
 			raw_spin_lock(&rt_rq->rt_runtime_lock);
 			rt_rq->rt_time += delta_exec;
-			if (sched_rt_runtime_exceeded(rt_rq))
+			exceeded = sched_rt_runtime_exceeded(rt_rq);
+			if (exceeded)
 				resched_curr(rq);
 			raw_spin_unlock(&rt_rq->rt_runtime_lock);
+			if (exceeded)
+				do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq));
 		}
 	}
 }
@@ -2654,8 +2663,12 @@ static int sched_rt_global_validate(void)
 
 static void sched_rt_do_global(void)
 {
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
 	def_rt_bandwidth.rt_runtime = global_rt_runtime();
 	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
+	raw_spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
 }
 
 int sched_rt_handler(struct ctl_table *table, int write,
-- 
Gitee


From 406dab9bc0d0014f9019bba17d0beb8bff7bee27 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Mar 2022 09:57:47 +0800
Subject: [PATCH 038/340] xfrm: fix a small bug in xfrm_sa_len()

stable inclusion
from linux-4.19.226
commit fcb8f503d607d18b9fc69613e38358aae9f9fa99

--------------------------------

[ Upstream commit 7770a39d7c63faec6c4f33666d49a8cb664d0482 ]

copy_user_offload() will actually push a struct struct xfrm_user_offload,
which is different than (struct xfrm_state *)->xso
(struct xfrm_state_offload)

Fixes: d77e38e612a01 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/xfrm/xfrm_user.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index f94abe1fdd58..87932f6ad9d7 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -2813,7 +2813,7 @@ static inline unsigned int xfrm_sa_len(struct xfrm_state *x)
 	if (x->props.extra_flags)
 		l += nla_total_size(sizeof(x->props.extra_flags));
 	if (x->xso.dev)
-		 l += nla_total_size(sizeof(x->xso));
+		 l += nla_total_size(sizeof(struct xfrm_user_offload));
 	if (x->props.smark.v | x->props.smark.m) {
 		l += nla_total_size(sizeof(x->props.smark.v));
 		l += nla_total_size(sizeof(x->props.smark.m));
-- 
Gitee


From e25cedfd876426cdd35d7d46b63ed3a82ab6ac14 Mon Sep 17 00:00:00 2001
From: Nicolas Toromanoff <nicolas.toromanoff@foss.st.com>
Date: Mon, 14 Mar 2022 09:57:48 +0800
Subject: [PATCH 039/340] crypto: stm32/cryp - fix double pm exit

stable inclusion
from linux-4.19.226
commit a803ac39fb163677b4eaf5317894a3dd62912b56

--------------------------------

[ Upstream commit 6c12e742785bf9333faf60bfb96575bdd763448e ]

Delete extraneous lines in probe error handling code: pm was
disabled twice.

Fixes: 65f9aa36ee47 ("crypto: stm32/cryp - Add power management support")

Reported-by: Marek Vasut <marex@denx.de>
Signed-off-by: Nicolas Toromanoff <nicolas.toromanoff@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/crypto/stm32/stm32-cryp.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 23b0b7bd64c7..b3b49dce1136 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -2036,8 +2036,6 @@ static int stm32_cryp_probe(struct platform_device *pdev)
 	list_del(&cryp->list);
 	spin_unlock(&cryp_list.lock);
 
-	pm_runtime_disable(dev);
-	pm_runtime_put_noidle(dev);
 	pm_runtime_disable(dev);
 	pm_runtime_put_noidle(dev);
 
-- 
Gitee


From 68c0fe9e15296eab661ff7efc014c8c6e64dfa9d Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Mon, 14 Mar 2022 09:57:49 +0800
Subject: [PATCH 040/340] xfrm: interface with if_id 0 should return error

stable inclusion
from linux-4.19.226
commit f7594c07fb0270c5414460432d3344b8831789c2

--------------------------------

[ Upstream commit 8dce43919566f06e865f7e8949f5c10d8c2493f5 ]

xfrm interface if_id = 0 would cause xfrm policy lookup errors since
Commit 9f8550e4bd9d.

Now explicitly fail to create an xfrm interface when if_id = 0

With this commit:
 ip link add ipsec0  type xfrm dev lo  if_id 0
 Error: if_id must be non zero.

v1->v2 change:
 - add Fixes: tag

Fixes: 9f8550e4bd9d ("xfrm: fix disable_xfrm sysctl when used on xfrm interfaces")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Reviewed-by: Eyal Birger <eyal.birger@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/xfrm/xfrm_interface.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 9dd43319b2c0..6bfd7b8249da 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -662,11 +662,16 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev,
 			struct netlink_ext_ack *extack)
 {
 	struct net *net = dev_net(dev);
-	struct xfrm_if_parms p;
+	struct xfrm_if_parms p = {};
 	struct xfrm_if *xi;
 	int err;
 
 	xfrmi_netlink_parms(data, &p);
+	if (!p.if_id) {
+		NL_SET_ERR_MSG(extack, "if_id must be non zero");
+		return -EINVAL;
+	}
+
 	xi = xfrmi_locate(net, &p);
 	if (xi)
 		return -EEXIST;
@@ -691,7 +696,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
 {
 	struct xfrm_if *xi = netdev_priv(dev);
 	struct net *net = xi->net;
-	struct xfrm_if_parms p;
+	struct xfrm_if_parms p = {};
+
+	if (!p.if_id) {
+		NL_SET_ERR_MSG(extack, "if_id must be non zero");
+		return -EINVAL;
+	}
 
 	xfrmi_netlink_parms(data, &p);
 	xi = xfrmi_locate(net, &p);
-- 
Gitee


From 38af239f108f6a602f813712a39798f4425f3a86 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Mon, 14 Mar 2022 09:57:50 +0800
Subject: [PATCH 041/340] xfrm: state and policy should fail if XFRMA_IF_ID 0

stable inclusion
from linux-4.19.226
commit a6ea2625df628d4d18b86f6ee21835c8962c8f23

--------------------------------

[ Upstream commit 68ac0f3810e76a853b5f7b90601a05c3048b8b54 ]

xfrm ineterface does not allow xfrm if_id = 0
fail to create or update xfrm state and policy.

With this commit:
 ip xfrm policy add src 192.0.2.1 dst 192.0.2.2 dir out if_id 0
 RTNETLINK answers: Invalid argument

 ip xfrm state add src 192.0.2.1 dst 192.0.2.2 proto esp spi 1 \
            reqid 1 mode tunnel aead 'rfc4106(gcm(aes))' \
            0x1111111111111111111111111111111111111111 96 if_id 0
 RTNETLINK answers: Invalid argument

v1->v2 change:
 - add Fixes: tag

Fixes: 9f8550e4bd9d ("xfrm: fix disable_xfrm sysctl when used on xfrm interfaces")
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/xfrm/xfrm_user.c | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 87932f6ad9d7..8d8f9e778cd4 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -620,8 +620,13 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_smark_init(attrs, &x->props.smark);
 
-	if (attrs[XFRMA_IF_ID])
+	if (attrs[XFRMA_IF_ID]) {
 		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+		if (!x->if_id) {
+			err = -EINVAL;
+			goto error;
+		}
+	}
 
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
@@ -1327,8 +1332,13 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	mark = xfrm_mark_get(attrs, &m);
 
-	if (attrs[XFRMA_IF_ID])
+	if (attrs[XFRMA_IF_ID]) {
 		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+		if (!if_id) {
+			err = -EINVAL;
+			goto out_noput;
+		}
+	}
 
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1630,8 +1640,13 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
 	xfrm_mark_get(attrs, &xp->mark);
 
-	if (attrs[XFRMA_IF_ID])
+	if (attrs[XFRMA_IF_ID]) {
 		xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
+		if (!xp->if_id) {
+			err = -EINVAL;
+			goto error;
+		}
+	}
 
 	return xp;
  error:
-- 
Gitee


From 0f8a88115ed26bd9225626e5ebae5f23bb9cea0a Mon Sep 17 00:00:00 2001
From: Xin Xiong <xiongx18@fudan.edu.cn>
Date: Mon, 14 Mar 2022 09:57:51 +0800
Subject: [PATCH 042/340] netfilter: ipt_CLUSTERIP: fix refcount leak in
 clusterip_tg_check()

stable inclusion
from linux-4.19.226
commit f0351a2178fa64f87e609c341f07bf4260b10daf

--------------------------------

[ Upstream commit d94a69cb2cfa77294921aae9afcfb866e723a2da ]

The issue takes place in one error path of clusterip_tg_check(). When
memcmp() returns nonzero, the function simply returns the error code,
forgetting to decrease the reference count of a clusterip_config
object, which is bumped earlier by clusterip_config_find_get(). This
may incur reference count leak.

Fix this issue by decrementing the refcount of the object in specific
error path.

Fixes: 06aa151ad1fc74 ("netfilter: ipt_CLUSTERIP: check MAC address when duplicate config is set")
Signed-off-by: Xin Xiong <xiongx18@fudan.edu.cn>
Signed-off-by: Xiyu Yang <xiyuyang19@fudan.edu.cn>
Signed-off-by: Xin Tan <tanxin.ctf@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/netfilter/ipt_CLUSTERIP.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2c11050d679b..08297d3504cf 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -509,8 +509,11 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
 			if (IS_ERR(config))
 				return PTR_ERR(config);
 		}
-	} else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN))
+	} else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
+		clusterip_config_entry_put(config);
+		clusterip_config_put(config);
 		return -EINVAL;
+	}
 
 	ret = nf_ct_netns_get(par->net, par->family);
 	if (ret < 0) {
-- 
Gitee


From eb30ebe770044b4a4ae36dfc45c3f550553a0bd5 Mon Sep 17 00:00:00 2001
From: Chen Jun <chenjun102@huawei.com>
Date: Mon, 14 Mar 2022 09:57:52 +0800
Subject: [PATCH 043/340] tpm: add request_locality before write TPM_INT_ENABLE

stable inclusion
from linux-4.19.226
commit f155b0fbde518857ee2d42f1fbfda6bdf0123363

--------------------------------

[ Upstream commit 0ef333f5ba7f24f5d8478425c163d3097f1c7afd ]

Locality is not appropriately requested before writing the int mask.
Add the missing boilerplate.

Fixes: e6aef069b6e9 ("tpm_tis: convert to using locality callbacks")
Signed-off-by: Chen Jun <chenjun102@huawei.com>
Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/char/tpm/tpm_tis_core.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/char/tpm/tpm_tis_core.c b/drivers/char/tpm/tpm_tis_core.c
index 0aade05f47fa..a8e8289e0b35 100644
--- a/drivers/char/tpm/tpm_tis_core.c
+++ b/drivers/char/tpm/tpm_tis_core.c
@@ -918,7 +918,15 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
 	intmask |= TPM_INTF_CMD_READY_INT | TPM_INTF_LOCALITY_CHANGE_INT |
 		   TPM_INTF_DATA_AVAIL_INT | TPM_INTF_STS_VALID_INT;
 	intmask &= ~TPM_GLOBAL_INT_ENABLE;
+
+	rc = request_locality(chip, 0);
+	if (rc < 0) {
+		rc = -ENODEV;
+		goto out_err;
+	}
+
 	tpm_tis_write32(priv, TPM_INT_ENABLE(priv->locality), intmask);
+	release_locality(chip, 0);
 
 	rc = tpm2_probe(chip);
 	if (rc)
-- 
Gitee


From caed2b609b8cc54ecb52b00c8b8f73148cfd66e3 Mon Sep 17 00:00:00 2001
From: Pavel Skripkin <paskripkin@gmail.com>
Date: Mon, 14 Mar 2022 09:57:53 +0800
Subject: [PATCH 044/340] net: mcs7830: handle usb read errors properly

stable inclusion
from linux-4.19.226
commit e0b2cd310d0859907a9a7b4e4ef9a5bcce9d3d6d

--------------------------------

[ Upstream commit d668769eb9c52b150753f1653f7f5a0aeb8239d2 ]

Syzbot reported uninit value in mcs7830_bind(). The problem was in
missing validation check for bytes read via usbnet_read_cmd().

usbnet_read_cmd() internally calls usb_control_msg(), that returns
number of bytes read. Code should validate that requested number of bytes
was actually read.

So, this patch adds missing size validation check inside
mcs7830_get_reg() to prevent uninit value bugs

Reported-and-tested-by: syzbot+003c0a286b9af5412510@syzkaller.appspotmail.com
Fixes: 2a36d7083438 ("USB: driver for mcs7830 (aka DeLOCK) USB ethernet adapter")
Signed-off-by: Pavel Skripkin <paskripkin@gmail.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lore.kernel.org/r/20220106225716.7425-1-paskripkin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/usb/mcs7830.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index 5a47e5510ca8..c0f52a622964 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -121,8 +121,16 @@ static const char driver_name[] = "MOSCHIP usb-ethernet driver";
 
 static int mcs7830_get_reg(struct usbnet *dev, u16 index, u16 size, void *data)
 {
-	return usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ,
-				0x0000, index, data, size);
+	int ret;
+
+	ret = usbnet_read_cmd(dev, MCS7830_RD_BREQ, MCS7830_RD_BMREQ,
+			      0x0000, index, data, size);
+	if (ret < 0)
+		return ret;
+	else if (ret < size)
+		return -ENODATA;
+
+	return ret;
 }
 
 static int mcs7830_set_reg(struct usbnet *dev, u16 index, u16 size, const void *data)
-- 
Gitee


From f19bc9786b7095592eb60ed4e425a35fee0f2b3f Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 14 Mar 2022 09:57:54 +0800
Subject: [PATCH 045/340] ext4: avoid trim error on fs with small groups

stable inclusion
from linux-4.19.226
commit 1105c2dac8cb45c297401e32884cbefc0f123301

--------------------------------

[ Upstream commit 173b6e383d2a204c9921ffc1eca3b87aa2106c33 ]

A user reported FITRIM ioctl failing for him on ext4 on some devices
without apparent reason.  After some debugging we've found out that
these devices (being LVM volumes) report rather large discard
granularity of 42MB and the filesystem had 1k blocksize and thus group
size of 8MB. Because ext4 FITRIM implementation puts discard
granularity into minlen, ext4_trim_fs() declared the trim request as
invalid. However just silently doing nothing seems to be a more
appropriate reaction to such combination of parameters since user did
not specify anything wrong.

CC: Lukas Czerner <lczerner@redhat.com>
Fixes: 5c2ed62fd447 ("ext4: Adjust minlen with discard_granularity in the FITRIM ioctl")
Signed-off-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20211112152202.26614-1-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/ioctl.c   | 2 --
 fs/ext4/mballoc.c | 8 ++++++++
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c6cac2d40f71..3c8324715e65 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1056,8 +1056,6 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		    sizeof(range)))
 			return -EFAULT;
 
-		range.minlen = max((unsigned int)range.minlen,
-				   q->limits.discard_granularity);
 		ret = ext4_trim_fs(sb, &range);
 		if (ret < 0)
 			return ret;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 69ed137d07a9..990fe7eed6bf 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -5262,6 +5262,7 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
  */
 int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 {
+	struct request_queue *q = bdev_get_queue(sb->s_bdev);
 	struct ext4_group_info *grp;
 	ext4_group_t group, first_group, last_group;
 	ext4_grpblk_t cnt = 0, first_cluster, last_cluster;
@@ -5280,6 +5281,13 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
 	    start >= max_blks ||
 	    range->len < sb->s_blocksize)
 		return -EINVAL;
+	/* No point to try to trim less than discard granularity */
+	if (range->minlen < q->limits.discard_granularity) {
+		minlen = EXT4_NUM_B2C(EXT4_SB(sb),
+			q->limits.discard_granularity >> sb->s_blocksize_bits);
+		if (minlen > EXT4_CLUSTERS_PER_GROUP(sb))
+			goto out;
+	}
 	if (end >= max_blks)
 		end = max_blks - 1;
 	if (end <= first_data_blk)
-- 
Gitee


From 9ea992512930a42a43860b268679ff5d05909710 Mon Sep 17 00:00:00 2001
From: Hector Martin <marcan@marcan.st>
Date: Mon, 14 Mar 2022 09:57:55 +0800
Subject: [PATCH 046/340] iommu/io-pgtable-arm: Fix table descriptor paddr
 formatting

stable inclusion
from linux-4.19.226
commit 4208239d748b5a8f32d5c2323d6f385dcb7f9b9e

--------------------------------

[ Upstream commit 9abe2ac834851a7d0b0756e295cf7a292c45ca53 ]

Table descriptors were being installed without properly formatting the
address using paddr_to_iopte, which does not match up with the
iopte_deref in __arm_lpae_map. This is incorrect for the LPAE pte
format, as it does not handle the high bits properly.

This was found on Apple T6000 DARTs, which require a new pte format
(different shift); adding support for that to
paddr_to_iopte/iopte_to_paddr caused it to break badly, as even <48-bit
addresses would end up incorrect in that case.

Fixes: 6c89928ff7a0 ("iommu/io-pgtable-arm: Support 52-bit physical address")
Acked-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Hector Martin <marcan@marcan.st>
Link: https://lore.kernel.org/r/20211120031343.88034-1-marcan@marcan.st
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/iommu/io-pgtable-arm.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index efd6e994678f..3f99077b3611 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -299,11 +299,12 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data,
 static arm_lpae_iopte arm_lpae_install_table(arm_lpae_iopte *table,
 					     arm_lpae_iopte *ptep,
 					     arm_lpae_iopte curr,
-					     struct io_pgtable_cfg *cfg)
+					     struct arm_lpae_io_pgtable *data)
 {
 	arm_lpae_iopte old, new;
+	struct io_pgtable_cfg *cfg = &data->iop.cfg;
 
-	new = __pa(table) | ARM_LPAE_PTE_TYPE_TABLE;
+	new = paddr_to_iopte(__pa(table), data) | ARM_LPAE_PTE_TYPE_TABLE;
 	if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)
 		new |= ARM_LPAE_PTE_NSTABLE;
 
@@ -354,7 +355,7 @@ static int __arm_lpae_map(struct arm_lpae_io_pgtable *data, unsigned long iova,
 		if (!cptep)
 			return -ENOMEM;
 
-		pte = arm_lpae_install_table(cptep, ptep, 0, cfg);
+		pte = arm_lpae_install_table(cptep, ptep, 0, data);
 		if (pte)
 			__arm_lpae_free_pages(cptep, tblsz, cfg);
 	} else if (!cfg->coherent_walk && !(pte & ARM_LPAE_PTE_SW_SYNC)) {
@@ -513,7 +514,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data,
 		__arm_lpae_init_pte(data, blk_paddr, pte, lvl, &tablep[i]);
 	}
 
-	pte = arm_lpae_install_table(tablep, ptep, blk_pte, cfg);
+	pte = arm_lpae_install_table(tablep, ptep, blk_pte, data);
 	if (pte != blk_pte) {
 		__arm_lpae_free_pages(tablep, tablesz, cfg);
 		/*
-- 
Gitee


From 878f72d43745fa236696de929b8f84f614c74a1d Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Mon, 14 Mar 2022 09:57:56 +0800
Subject: [PATCH 047/340] scsi: ufs: Fix race conditions related to driver data

stable inclusion
from linux-4.19.226
commit 5e7fb994ccc7200e20d7f394aa3f082f4efb49ab

--------------------------------

[ Upstream commit 21ad0e49085deb22c094f91f9da57319a97188e4 ]

The driver data pointer must be set before any callbacks are registered
that use that pointer. Hence move the initialization of that pointer from
after the ufshcd_init() call to inside ufshcd_init().

Link: https://lore.kernel.org/r/20211203231950.193369-7-bvanassche@acm.org
Fixes: 3b1d05807a9a ("[SCSI] ufs: Segregate PCI Specific Code")
Reported-by: Alexey Dobriyan <adobriyan@gmail.com>
Tested-by: Bean Huo <beanhuo@micron.com>
Reviewed-by: Bean Huo <beanhuo@micron.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/scsi/ufs/tc-dwc-g210-pci.c | 1 -
 drivers/scsi/ufs/ufshcd-pltfrm.c   | 2 --
 drivers/scsi/ufs/ufshcd.c          | 7 +++++++
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ufs/tc-dwc-g210-pci.c b/drivers/scsi/ufs/tc-dwc-g210-pci.c
index 2f41722a8c28..2c6cb7f6b61a 100644
--- a/drivers/scsi/ufs/tc-dwc-g210-pci.c
+++ b/drivers/scsi/ufs/tc-dwc-g210-pci.c
@@ -138,7 +138,6 @@ tc_dwc_g210_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		return err;
 	}
 
-	pci_set_drvdata(pdev, hba);
 	pm_runtime_put_noidle(&pdev->dev);
 	pm_runtime_allow(&pdev->dev);
 
diff --git a/drivers/scsi/ufs/ufshcd-pltfrm.c b/drivers/scsi/ufs/ufshcd-pltfrm.c
index 30c22e16b1e3..57985841a879 100644
--- a/drivers/scsi/ufs/ufshcd-pltfrm.c
+++ b/drivers/scsi/ufs/ufshcd-pltfrm.c
@@ -348,8 +348,6 @@ int ufshcd_pltfrm_init(struct platform_device *pdev,
 		goto dealloc_host;
 	}
 
-	platform_set_drvdata(pdev, hba);
-
 	pm_runtime_set_active(&pdev->dev);
 	pm_runtime_enable(&pdev->dev);
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 3d9bfa520768..2082907bea50 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -8032,6 +8032,13 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
 	struct Scsi_Host *host = hba->host;
 	struct device *dev = hba->dev;
 
+	/*
+	 * dev_set_drvdata() must be called before any callbacks are registered
+	 * that use dev_get_drvdata() (frequency scaling, clock scaling, hwmon,
+	 * sysfs).
+	 */
+	dev_set_drvdata(dev, hba);
+
 	if (!mmio_base) {
 		dev_err(hba->dev,
 		"Invalid memory reference for mmio_base is NULL\n");
-- 
Gitee


From 15ff35bb61a1c972f98eb8c74680be24de4c2c5d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 14 Mar 2022 09:57:57 +0800
Subject: [PATCH 048/340] dmaengine: pxa/mmp: stop referencing config->slave_id

stable inclusion
from linux-4.19.226
commit c976850fa06cca6675f24fdd5b44cb13ea9d7189

--------------------------------

[ Upstream commit 134c37fa250a87a7e77c80a7c59ae16c462e46e0 ]

The last driver referencing the slave_id on Marvell PXA and MMP platforms
was the SPI driver, but this stopped doing so a long time ago, so the
TODO from the earlier patch can no be removed.

Fixes: b729bf34535e ("spi/pxa2xx: Don't use slave_id of dma_slave_config")
Fixes: 13b3006b8ebd ("dma: mmp_pdma: add filter function")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Mark Brown <broonie@kernel.org>
Link: https://lore.kernel.org/r/20211122222203.4103644-7-arnd@kernel.org
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/dma/mmp_pdma.c | 6 ------
 drivers/dma/pxa_dma.c  | 7 -------
 2 files changed, 13 deletions(-)

diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index eb3a1f42ab06..e8b2d3e31de8 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -722,12 +722,6 @@ static int mmp_pdma_config(struct dma_chan *dchan,
 
 	chan->dir = cfg->direction;
 	chan->dev_addr = addr;
-	/* FIXME: drivers should be ported over to use the filter
-	 * function. Once that's done, the following two lines can
-	 * be removed.
-	 */
-	if (cfg->slave_id)
-		chan->drcmr = cfg->slave_id;
 
 	return 0;
 }
diff --git a/drivers/dma/pxa_dma.c b/drivers/dma/pxa_dma.c
index b31c28b67ad3..c54986902b9d 100644
--- a/drivers/dma/pxa_dma.c
+++ b/drivers/dma/pxa_dma.c
@@ -960,13 +960,6 @@ static void pxad_get_config(struct pxad_chan *chan,
 		*dcmd |= PXA_DCMD_BURST16;
 	else if (maxburst == 32)
 		*dcmd |= PXA_DCMD_BURST32;
-
-	/* FIXME: drivers should be ported over to use the filter
-	 * function. Once that's done, the following two lines can
-	 * be removed.
-	 */
-	if (chan->cfg.slave_id)
-		chan->drcmr = chan->cfg.slave_id;
 }
 
 static struct dma_async_tx_descriptor *
-- 
Gitee


From 4407f10ebb0399bada9ba4e1b94c712bb1ab4f2b Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Mon, 14 Mar 2022 09:57:58 +0800
Subject: [PATCH 049/340] net-sysfs: update the queue counts in the
 unregistration path

stable inclusion
from linux-4.19.226
commit 35cad2003b6447932cfe91f795090586306738e8

--------------------------------

[ Upstream commit d7dac083414eb5bb99a6d2ed53dc2c1b405224e5 ]

When updating Rx and Tx queue kobjects, the queue count should always be
updated to match the queue kobjects count. This was not done in the net
device unregistration path, fix it. Tracking all queue count updates
will allow in a following up patch to detect illegal updates.

Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/net-sysfs.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index fe0d255d66c8..e5dc04cb5599 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1616,6 +1616,9 @@ static void remove_queue_kobjects(struct net_device *dev)
 
 	net_rx_queue_update_kobjects(dev, real_rx, 0);
 	netdev_queue_update_kobjects(dev, real_tx, 0);
+
+	dev->real_num_rx_queues = 0;
+	dev->real_num_tx_queues = 0;
 #ifdef CONFIG_SYSFS
 	kset_unregister(dev->queues_kset);
 #endif
-- 
Gitee


From 7ff83febc9ff2447df1bdf19eab137006239475a Mon Sep 17 00:00:00 2001
From: Suresh Kumar <surkumar@redhat.com>
Date: Mon, 14 Mar 2022 09:57:59 +0800
Subject: [PATCH 050/340] net: bonding: debug: avoid printing debug logs when
 bond is not notifying peers

stable inclusion
from linux-4.19.226
commit 4cca06db20796f60ad83943b80664ef0d652dcac

--------------------------------

[ Upstream commit fee32de284ac277ba434a2d59f8ce46528ff3946 ]

Currently "bond_should_notify_peers: slave ..." messages are printed whenever
"bond_should_notify_peers" function is called.

+++
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:26 node1 kernel: bond0: (slave enp0s25): Received LACPDU on port 1
Dec 12 12:33:26 node1 kernel: bond0: (slave enp0s25): Rx Machine: Port=1, Last State=6, Curr State=6
Dec 12 12:33:26 node1 kernel: bond0: (slave enp0s25): partner sync=1
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:26 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
...
Dec 12 12:33:30 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:30 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:30 node1 kernel: bond0: (slave enp4s3): Received LACPDU on port 2
Dec 12 12:33:30 node1 kernel: bond0: (slave enp4s3): Rx Machine: Port=2, Last State=6, Curr State=6
Dec 12 12:33:30 node1 kernel: bond0: (slave enp4s3): partner sync=1
Dec 12 12:33:30 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:30 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
Dec 12 12:33:30 node1 kernel: bond0: bond_should_notify_peers: slave enp0s25
+++

This is confusing and can also clutter up debug logs.
Print logs only when the peer notification happens.

Signed-off-by: Suresh Kumar <suresh2514@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/bonding/bond_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c952ab169e4e..f4d40e983e99 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -782,9 +782,6 @@ static bool bond_should_notify_peers(struct bonding *bond)
 	slave = rcu_dereference(bond->curr_active_slave);
 	rcu_read_unlock();
 
-	netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
-		   slave ? slave->dev->name : "NULL");
-
 	if (!slave || !bond->send_peer_notif ||
 	    bond->send_peer_notif %
 	    max(1, bond->params.peer_notif_delay) != 0 ||
@@ -792,6 +789,9 @@ static bool bond_should_notify_peers(struct bonding *bond)
 	    test_bit(__LINK_STATE_LINKWATCH_PENDING, &slave->dev->state))
 		return false;
 
+	netdev_dbg(bond->dev, "bond_should_notify_peers: slave %s\n",
+		   slave ? slave->dev->name : "NULL");
+
 	return true;
 }
 
-- 
Gitee


From cd2a66cec92953ac6159dd1f8f66072eb7e44e85 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Mon, 14 Mar 2022 09:58:00 +0800
Subject: [PATCH 051/340] bpf: Do not WARN in bpf_warn_invalid_xdp_action()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.226
commit f6d5eb174ee9b9ca4911b98f243f149fbfd19a43

--------------------------------

[ Upstream commit 2cbad989033bff0256675c38f96f5faab852af4b ]

The WARN_ONCE() in bpf_warn_invalid_xdp_action() can be triggered by
any bugged program, and even attaching a correct program to a NIC
not supporting the given action.

The resulting splat, beyond polluting the logs, fouls automated tools:
e.g. a syzkaller reproducers using an XDP program returning an
unsupported action will never pass validation.

Replace the WARN_ONCE with a less intrusive pr_warn_once().

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/016ceec56e4817ebb2a9e35ce794d5c917df572c.1638189075.git.pabeni@redhat.com
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/filter.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 5f24a6b82802..0730395918e0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5528,9 +5528,9 @@ void bpf_warn_invalid_xdp_action(u32 act)
 {
 	const u32 act_max = XDP_REDIRECT;
 
-	WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n",
-		  act > act_max ? "Illegal" : "Driver unsupported",
-		  act);
+	pr_warn_once("%s XDP return value %u, expect packet loss!\n",
+		     act > act_max ? "Illegal" : "Driver unsupported",
+		     act);
 }
 EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action);
 
-- 
Gitee


From 5ccb974b936e362905267829ae33d209283d974a Mon Sep 17 00:00:00 2001
From: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
Date: Mon, 14 Mar 2022 09:58:01 +0800
Subject: [PATCH 052/340] jffs2: GC deadlock reading a page that is used in
 jffs2_write_begin()

stable inclusion
from linux-4.19.226
commit c0b59abaf8effbac022f6d8b526c4b0b620920b3

--------------------------------

[ Upstream commit aa39cc675799bc92da153af9a13d6f969c348e82 ]

GC task can deadlock in read_cache_page() because it may attempt
to release a page that is actually allocated by another task in
jffs2_write_begin().
The reason is that in jffs2_write_begin() there is a small window
a cache page is allocated for use but not set Uptodate yet.

This ends up with a deadlock between two tasks:
1) A task (e.g. file copy)
   - jffs2_write_begin() locks a cache page
   - jffs2_write_end() tries to lock "alloc_sem" from
	 jffs2_reserve_space() <-- STUCK
2) GC task (jffs2_gcd_mtd3)
   - jffs2_garbage_collect_pass() locks "alloc_sem"
   - try to lock the same cache page in read_cache_page() <-- STUCK

So to avoid this deadlock, hold "alloc_sem" in jffs2_write_begin()
while reading data in a cache page.

Signed-off-by: Kyeong Yoo <kyeong.yoo@alliedtelesis.co.nz>
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/jffs2/file.c | 40 +++++++++++++++++++++++++---------------
 1 file changed, 25 insertions(+), 15 deletions(-)

diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 7d8654a1472e..3047872fdac9 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -135,20 +135,15 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 	struct page *pg;
 	struct inode *inode = mapping->host;
 	struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 	pgoff_t index = pos >> PAGE_SHIFT;
 	uint32_t pageofs = index << PAGE_SHIFT;
 	int ret = 0;
 
-	pg = grab_cache_page_write_begin(mapping, index, flags);
-	if (!pg)
-		return -ENOMEM;
-	*pagep = pg;
-
 	jffs2_dbg(1, "%s()\n", __func__);
 
 	if (pageofs > inode->i_size) {
 		/* Make new hole frag from old EOF to new page */
-		struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
 		struct jffs2_raw_inode ri;
 		struct jffs2_full_dnode *fn;
 		uint32_t alloc_len;
@@ -159,7 +154,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		ret = jffs2_reserve_space(c, sizeof(ri), &alloc_len,
 					  ALLOC_NORMAL, JFFS2_SUMMARY_INODE_SIZE);
 		if (ret)
-			goto out_page;
+			goto out_err;
 
 		mutex_lock(&f->sem);
 		memset(&ri, 0, sizeof(ri));
@@ -189,7 +184,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 			ret = PTR_ERR(fn);
 			jffs2_complete_reservation(c);
 			mutex_unlock(&f->sem);
-			goto out_page;
+			goto out_err;
 		}
 		ret = jffs2_add_full_dnode_to_inode(c, f, fn);
 		if (f->metadata) {
@@ -204,13 +199,26 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 			jffs2_free_full_dnode(fn);
 			jffs2_complete_reservation(c);
 			mutex_unlock(&f->sem);
-			goto out_page;
+			goto out_err;
 		}
 		jffs2_complete_reservation(c);
 		inode->i_size = pageofs;
 		mutex_unlock(&f->sem);
 	}
 
+	/*
+	 * While getting a page and reading data in, lock c->alloc_sem until
+	 * the page is Uptodate. Otherwise GC task may attempt to read the same
+	 * page in read_cache_page(), which causes a deadlock.
+	 */
+	mutex_lock(&c->alloc_sem);
+	pg = grab_cache_page_write_begin(mapping, index, flags);
+	if (!pg) {
+		ret = -ENOMEM;
+		goto release_sem;
+	}
+	*pagep = pg;
+
 	/*
 	 * Read in the page if it wasn't already present. Cannot optimize away
 	 * the whole page write case until jffs2_write_end can handle the
@@ -220,15 +228,17 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
 		mutex_lock(&f->sem);
 		ret = jffs2_do_readpage_nolock(inode, pg);
 		mutex_unlock(&f->sem);
-		if (ret)
-			goto out_page;
+		if (ret) {
+			unlock_page(pg);
+			put_page(pg);
+			goto release_sem;
+		}
 	}
 	jffs2_dbg(1, "end write_begin(). pg->flags %lx\n", pg->flags);
-	return ret;
 
-out_page:
-	unlock_page(pg);
-	put_page(pg);
+release_sem:
+	mutex_unlock(&c->alloc_sem);
+out_err:
 	return ret;
 }
 
-- 
Gitee


From 547341181064479f3c3700fdc318bcdd31b192f5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 14 Mar 2022 09:58:02 +0800
Subject: [PATCH 053/340] ACPICA: Utilities: Avoid deleting the same object
 twice in a row

stable inclusion
from linux-4.19.226
commit e198165efa9032550f05441c78adee56e16850b4

--------------------------------

[ Upstream commit 1cdfe9e346b4c5509ffe19ccde880fd259d9f7a3 ]

ACPICA commit c11af67d8f7e3d381068ce7771322f2b5324d687

If original_count is 0 in acpi_ut_update_ref_count (),
acpi_ut_delete_internal_obj () is invoked for the target object, which is
incorrect, because that object has been deleted once already and the
memory allocated to store it may have been reclaimed and allocated
for a different purpose by the host OS.  Moreover, a confusing debug
message following the "Reference Count is already zero, cannot
decrement" warning is printed in that case.

To fix this issue, make acpi_ut_update_ref_count () return after finding
that original_count is 0 and printing the above warning.

Link: https://github.com/acpica/acpica/commit/c11af67d
Link: https://github.com/acpica/acpica/pull/652
Reported-by: Mark Asselstine <mark.asselstine@windriver.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/acpi/acpica/utdelete.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/acpi/acpica/utdelete.c b/drivers/acpi/acpica/utdelete.c
index 8cc4392c61f3..42e489cfa4d5 100644
--- a/drivers/acpi/acpica/utdelete.c
+++ b/drivers/acpi/acpica/utdelete.c
@@ -410,6 +410,7 @@ acpi_ut_update_ref_count(union acpi_operand_object *object, u32 action)
 			ACPI_WARNING((AE_INFO,
 				      "Obj %p, Reference Count is already zero, cannot decrement\n",
 				      object));
+			return;
 		}
 
 		ACPI_DEBUG_PRINT_RAW((ACPI_DB_ALLOCATIONS,
-- 
Gitee


From 3998ce7e7ebcac786a24a9358f9edd48b4084e1a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 14 Mar 2022 09:58:03 +0800
Subject: [PATCH 054/340] ACPICA: Executer: Fix the REFCLASS_REFOF case in
 acpi_ex_opcode_1A_0T_1R()

stable inclusion
from linux-4.19.226
commit addda1b65b5dfbbdff0f1e100aed0a72b8af5bb8

--------------------------------

[ Upstream commit 24ea5f90ec9548044a6209685c5010edd66ffe8f ]

ACPICA commit d984f12041392fa4156b52e2f7e5c5e7bc38ad9e

If Operand[0] is a reference of the ACPI_REFCLASS_REFOF class,
acpi_ex_opcode_1A_0T_1R () calls acpi_ns_get_attached_object () to
obtain return_desc which may require additional resolution with
the help of acpi_ex_read_data_from_field (). If the latter fails,
the reference counter of the original return_desc is decremented
which is incorrect, because acpi_ns_get_attached_object () does not
increment the reference counter of the object returned by it.

This issue may lead to premature deletion of the attached object
while it is still attached and a use-after-free and crash in the
host OS.  For example, this may happen when on evaluation of ref_of()
a local region field where there is no registered handler for the
given Operation Region.

Fix it by making acpi_ex_opcode_1A_0T_1R () return Status right away
after a acpi_ex_read_data_from_field () failure.

Link: https://github.com/acpica/acpica/commit/d984f120
Link: https://github.com/acpica/acpica/pull/685
Reported-by: Lenny Szubowicz <lszubowi@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/acpi/acpica/exoparg1.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/acpica/exoparg1.c b/drivers/acpi/acpica/exoparg1.c
index ba9fbae0cf91..319f4bc6a839 100644
--- a/drivers/acpi/acpica/exoparg1.c
+++ b/drivers/acpi/acpica/exoparg1.c
@@ -1007,7 +1007,8 @@ acpi_status acpi_ex_opcode_1A_0T_1R(struct acpi_walk_state *walk_state)
 						    (walk_state, return_desc,
 						     &temp_desc);
 						if (ACPI_FAILURE(status)) {
-							goto cleanup;
+							return_ACPI_STATUS
+							    (status);
 						}
 
 						return_desc = temp_desc;
-- 
Gitee


From bd623005181cfaec2ab7e2212835aca9e4926af9 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 14 Mar 2022 09:58:04 +0800
Subject: [PATCH 055/340] dm btree: add a defensive bounds check to insert_at()

stable inclusion
from linux-4.19.226
commit 716e490c87e4421d5908f7818fc6b61536a17744

--------------------------------

[ Upstream commit 85bca3c05b6cca31625437eedf2060e846c4bbad ]

Corrupt metadata could trigger an out of bounds write.

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/md/persistent-data/dm-btree.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 8aae0624a297..6383afb88f31 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -83,14 +83,16 @@ void inc_children(struct dm_transaction_manager *tm, struct btree_node *n,
 }
 
 static int insert_at(size_t value_size, struct btree_node *node, unsigned index,
-		      uint64_t key, void *value)
-		      __dm_written_to_disk(value)
+		     uint64_t key, void *value)
+	__dm_written_to_disk(value)
 {
 	uint32_t nr_entries = le32_to_cpu(node->header.nr_entries);
+	uint32_t max_entries = le32_to_cpu(node->header.max_entries);
 	__le64 key_le = cpu_to_le64(key);
 
 	if (index > nr_entries ||
-	    index >= le32_to_cpu(node->header.max_entries)) {
+	    index >= max_entries ||
+	    nr_entries >= max_entries) {
 		DMERR("too many entries in btree node for insert");
 		__dm_unbless_for_disk(value);
 		return -ENOMEM;
-- 
Gitee


From 37050d1b4cdd1fa154f9badd3de40964eb4075c5 Mon Sep 17 00:00:00 2001
From: Joe Thornber <ejt@redhat.com>
Date: Mon, 14 Mar 2022 09:58:05 +0800
Subject: [PATCH 056/340] dm space map common: add bounds check to
 sm_ll_lookup_bitmap()

stable inclusion
from linux-4.19.226
commit 7d585b602a5c054e810b8f47183069872ac2fdf6

--------------------------------

[ Upstream commit cba23ac158db7f3cd48a923d6861bee2eb7a2978 ]

Corrupted metadata could warrant returning error from sm_ll_lookup_bitmap().

Signed-off-by: Joe Thornber <ejt@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/md/persistent-data/dm-space-map-common.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index a284762e548e..5115a2719603 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -279,6 +279,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
 	struct disk_index_entry ie_disk;
 	struct dm_block *blk;
 
+	if (b >= ll->nr_blocks) {
+		DMERR_LIMIT("metadata block out of bounds");
+		return -EINVAL;
+	}
+
 	b = do_div(index, ll->entries_per_block);
 	r = ll->load_ie(ll, index, &ie_disk);
 	if (r < 0)
-- 
Gitee


From 51894623c7fc5480d207f302757fc7e12e9a4700 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Mar 2022 09:58:06 +0800
Subject: [PATCH 057/340] scsi: sr: Don't use GFP_DMA

stable inclusion
from linux-4.19.226
commit b24ef0a4974a95c9ee1aade3f39b90fc30ac552d

--------------------------------

[ Upstream commit d94d94969a4ba07a43d62429c60372320519c391 ]

The allocated buffers are used as a command payload, for which the block
layer and/or DMA API do the proper bounce buffering if needed.

Link: https://lore.kernel.org/r/20211222090842.920724-1-hch@lst.de
Reported-by: Baoquan He <bhe@redhat.com>
Reviewed-by: Baoquan He <bhe@redhat.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/scsi/sr.c        | 2 +-
 drivers/scsi/sr_vendor.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/sr.c b/drivers/scsi/sr.c
index be2daf5536ff..180087d1c6cd 100644
--- a/drivers/scsi/sr.c
+++ b/drivers/scsi/sr.c
@@ -885,7 +885,7 @@ static void get_capabilities(struct scsi_cd *cd)
 
 
 	/* allocate transfer buffer */
-	buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
+	buffer = kmalloc(512, GFP_KERNEL);
 	if (!buffer) {
 		sr_printk(KERN_ERR, cd, "out of memory.\n");
 		return;
diff --git a/drivers/scsi/sr_vendor.c b/drivers/scsi/sr_vendor.c
index e3b0ce25162b..2887be4316be 100644
--- a/drivers/scsi/sr_vendor.c
+++ b/drivers/scsi/sr_vendor.c
@@ -119,7 +119,7 @@ int sr_set_blocklength(Scsi_CD *cd, int blocklength)
 		density = (blocklength > 2048) ? 0x81 : 0x83;
 #endif
 
-	buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
+	buffer = kmalloc(512, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
@@ -167,7 +167,7 @@ int sr_cd_check(struct cdrom_device_info *cdi)
 	if (cd->cdi.mask & CDC_MULTI_SESSION)
 		return 0;
 
-	buffer = kmalloc(512, GFP_KERNEL | GFP_DMA);
+	buffer = kmalloc(512, GFP_KERNEL);
 	if (!buffer)
 		return -ENOMEM;
 
-- 
Gitee


From 8247bb0155d50b8ee404e9fa58f13523cac3b03d Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Mon, 14 Mar 2022 09:58:07 +0800
Subject: [PATCH 058/340] serial: Fix incorrect rs485 polarity on uart open

stable inclusion
from linux-4.19.226
commit 510aa43c9928ca38f1210b6c255373727d7ea312

--------------------------------

commit d3b3404df318504ec084213ab1065b73f49b0f1d upstream.

Commit a6845e1e1b78 ("serial: core: Consider rs485 settings to drive
RTS") sought to deassert RTS when opening an rs485-enabled uart port.
That way, the transceiver does not occupy the bus until it transmits
data.

Unfortunately, the commit mixed up the logic and *asserted* RTS instead
of *deasserting* it:

The commit amended uart_port_dtr_rts(), which raises DTR and RTS when
opening an rs232 port.  "Raising" actually means lowering the signal
that's coming out of the uart, because an rs232 transceiver not only
changes a signal's voltage level, it also *inverts* the signal.  See
the simplified schematic in the MAX232 datasheet for an example:
https://www.ti.com/lit/ds/symlink/max232.pdf

So, to raise RTS on an rs232 port, TIOCM_RTS is *set* in port->mctrl
and that results in the signal being driven low.

In contrast to rs232, the signal level for rs485 Transmit Enable is the
identity, not the inversion:  If the transceiver expects a "high" RTS
signal for Transmit Enable, the signal coming out of the uart must also
be high, so TIOCM_RTS must be *cleared* in port->mctrl.

The commit did the exact opposite, but it's easy to see why given the
confusing semantics of rs232 and rs485.  Fix it.

Fixes: a6845e1e1b78 ("serial: core: Consider rs485 settings to drive RTS")
Cc: stable@vger.kernel.org # v4.14+
Cc: Rafael Gago Castano <rgc@hms.se>
Cc: Jan Kiszka <jan.kiszka@siemens.com>
Cc: Su Bao Cheng <baocheng.su@siemens.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Link: https://lore.kernel.org/r/9395767847833f2f3193c49cde38501eeb3b5669.1639821059.git.lukas@wunner.de
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/serial_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 4e60045487a8..8d8d63c3ca7d 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -159,7 +159,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
 	int RTS_after_send = !!(uport->rs485.flags & SER_RS485_RTS_AFTER_SEND);
 
 	if (raise) {
-		if (rs485_on && !RTS_after_send) {
+		if (rs485_on && RTS_after_send) {
 			uart_set_mctrl(uport, TIOCM_DTR);
 			uart_clear_mctrl(uport, TIOCM_RTS);
 		} else {
@@ -168,7 +168,7 @@ static void uart_port_dtr_rts(struct uart_port *uport, int raise)
 	} else {
 		unsigned int clear = TIOCM_DTR;
 
-		clear |= (!rs485_on || !RTS_after_send) ? TIOCM_RTS : 0;
+		clear |= (!rs485_on || RTS_after_send) ? TIOCM_RTS : 0;
 		uart_clear_mctrl(uport, clear);
 	}
 }
-- 
Gitee


From 5b303f3d612c0f2d0a462aa3236b33f7e41dbb7a Mon Sep 17 00:00:00 2001
From: Andrey Ryabinin <arbn@yandex-team.com>
Date: Mon, 14 Mar 2022 09:58:08 +0800
Subject: [PATCH 059/340] cputime, cpuacct: Include guest time in user time in
 cpuacct.stat

stable inclusion
from linux-4.19.226
commit 952514c8565cf72a966993b473fae1708c3684f3

--------------------------------

commit 9731698ecb9c851f353ce2496292ff9fcea39dff upstream.

cpuacct.stat in no-root cgroups shows user time without guest time
included int it. This doesn't match with user time shown in root
cpuacct.stat and /proc/<pid>/stat. This also affects cgroup2's cpu.stat
in the same way.

Make account_guest_time() to add user time to cgroup's cpustat to
fix this.

Fixes: ef12fefabf94 ("cpuacct: add per-cgroup utime/stime statistics")
Signed-off-by: Andrey Ryabinin <arbn@yandex-team.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Jordan <daniel.m.jordan@oracle.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>
Link: https://lore.kernel.org/r/20211115164607.23784-1-arbn@yandex-team.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/sched/cputime.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 8dd27c1fbb29..0df2448eb5d1 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -152,10 +152,10 @@ void account_guest_time(struct task_struct *p, u64 cputime)
 
 	/* Add guest time to cpustat. */
 	if (task_nice(p) > 0) {
-		cpustat[CPUTIME_NICE] += cputime;
+		task_group_account_field(p, CPUTIME_NICE, cputime);
 		cpustat[CPUTIME_GUEST_NICE] += cputime;
 	} else {
-		cpustat[CPUTIME_USER] += cputime;
+		task_group_account_field(p, CPUTIME_USER, cputime);
 		cpustat[CPUTIME_GUEST] += cputime;
 	}
 }
-- 
Gitee


From 825982c0c0d10628a2a635369c2608ff598f6218 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 14 Mar 2022 09:58:09 +0800
Subject: [PATCH 060/340] ext4: make sure to reset inode lockdep class when
 quota enabling fails

stable inclusion
from linux-4.19.226
commit ef41f72716c469a670b9d556b65e5ed83a3a5fd7

--------------------------------

commit 4013d47a5307fdb5c13370b5392498b00fedd274 upstream.

When we succeed in enabling some quota type but fail to enable another
one with quota feature, we correctly disable all enabled quota types.
However we forget to reset i_data_sem lockdep class. When the inode gets
freed and reused, it will inherit this lockdep class (i_data_sem is
initialized only when a slab is created) and thus eventually lockdep
barfs about possible deadlocks.

Reported-and-tested-by: syzbot+3b6f9218b1301ddda3e2@syzkaller.appspotmail.com
Signed-off-by: Jan Kara <jack@suse.cz>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20211007155336.12493-3-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/super.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8f9d60ec607e..3100f4aa2d59 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6166,8 +6166,19 @@ static int ext4_enable_quotas(struct super_block *sb)
 					"Failed to enable quota tracking "
 					"(type=%d, err=%d). Please run "
 					"e2fsck to fix.", type, err);
-				for (type--; type >= 0; type--)
+				for (type--; type >= 0; type--) {
+					struct inode *inode;
+
+					inode = sb_dqopt(sb)->files[type];
+					if (inode)
+						inode = igrab(inode);
 					dquot_quota_off(sb, type);
+					if (inode) {
+						lockdep_set_quota_inode(inode,
+							I_DATA_SEM_NORMAL);
+						iput(inode);
+					}
+				}
 
 				return err;
 			}
-- 
Gitee


From 546ab4a5d01c18746055fc42d2ee82c9f56cd45b Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 14 Mar 2022 09:58:10 +0800
Subject: [PATCH 061/340] ext4: make sure quota gets properly shutdown on error

stable inclusion
from linux-4.19.226
commit 841bba6544e10cab41535b76bbdd37555a6ab9df

--------------------------------

commit 15fc69bbbbbc8c72e5f6cc4e1be0f51283c5448e upstream.

When we hit an error when enabling quotas and setting inode flags, we do
not properly shutdown quota subsystem despite returning error from
Q_QUOTAON quotactl. This can lead to some odd situations like kernel
using quota file while it is still writeable for userspace. Make sure we
properly cleanup the quota subsystem in case of error.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20211007155336.12493-2-jack@suse.cz
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/super.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 3100f4aa2d59..f961f7d94a70 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -6080,10 +6080,7 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 	lockdep_set_quota_inode(path->dentry->d_inode, I_DATA_SEM_QUOTA);
 	err = dquot_quota_on(sb, type, format_id, path);
-	if (err) {
-		lockdep_set_quota_inode(path->dentry->d_inode,
-					     I_DATA_SEM_NORMAL);
-	} else {
+	if (!err) {
 		struct inode *inode = d_inode(path->dentry);
 		handle_t *handle;
 
@@ -6103,7 +6100,12 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 		ext4_journal_stop(handle);
 	unlock_inode:
 		inode_unlock(inode);
+		if (err)
+			dquot_quota_off(sb, type);
 	}
+	if (err)
+		lockdep_set_quota_inode(path->dentry->d_inode,
+					     I_DATA_SEM_NORMAL);
 	return err;
 }
 
-- 
Gitee


From 7e65b84907dfa5c5b994213abf93ae1fe2b201fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lu=C3=ADs=20Henriques?= <lhenriques@suse.de>
Date: Mon, 14 Mar 2022 09:58:11 +0800
Subject: [PATCH 062/340] ext4: set csum seed in tmp inode while migrating to
 extents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.226
commit 9103cafdc4531285b6ededd0a3437effd71ff255

--------------------------------

commit e81c9302a6c3c008f5c30beb73b38adb0170ff2d upstream.

When migrating to extents, the temporary inode will have it's own checksum
seed.  This means that, when swapping the inodes data, the inode checksums
will be incorrect.

This can be fixed by recalculating the extents checksums again.  Or simply
by copying the seed into the temporary inode.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=213357
Reported-by: Jeroen van Wolffelaar <jeroen@wolffelaar.nl>
Signed-off-by: Luís Henriques <lhenriques@suse.de>
Link: https://lore.kernel.org/r/20211214175058.19511-1-lhenriques@suse.de
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/migrate.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 7eb036cd1d93..0b3194b59d12 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -457,6 +457,17 @@ int ext4_ext_migrate(struct inode *inode)
 		ext4_journal_stop(handle);
 		goto out_unlock;
 	}
+	/*
+	 * Use the correct seed for checksum (i.e. the seed from 'inode').  This
+	 * is so that the metadata blocks will have the correct checksum after
+	 * the migration.
+	 *
+	 * Note however that, if a crash occurs during the migration process,
+	 * the recovery process is broken because the tmp_inode checksums will
+	 * be wrong and the orphans cleanup will fail.
+	 */
+	ei = EXT4_I(inode);
+	EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
 	i_size_write(tmp_inode, i_size_read(inode));
 	/*
 	 * Set the i_nlink to zero so it will be deleted later
@@ -500,7 +511,6 @@ int ext4_ext_migrate(struct inode *inode)
 		goto out_tmp_inode;
 	}
 
-	ei = EXT4_I(inode);
 	i_data = ei->i_data;
 	memset(&lb, 0, sizeof(lb));
 
-- 
Gitee


From 277de38db93dde878c60df1c7071f76f8917f663 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Mon, 14 Mar 2022 09:58:12 +0800
Subject: [PATCH 063/340] ext4: don't use the orphan list when migrating an
 inode

stable inclusion
from linux-4.19.226
commit 33446496d21753b5ceb55be4d6e593b487a61239

--------------------------------

commit 6eeaf88fd586f05aaf1d48cb3a139d2a5c6eb055 upstream.

We probably want to remove the indirect block to extents migration
feature after a deprecation window, but until then, let's fix a
potential data loss problem caused by the fact that we put the
tmp_inode on the orphan list.  In the unlikely case where we crash and
do a journal recovery, the data blocks belonging to the inode being
migrated are also represented in the tmp_inode on the orphan list ---
and so its data blocks will get marked unallocated, and available for
reuse.

Instead, stop putting the tmp_inode on the oprhan list.  So in the
case where we crash while migrating the inode, we'll leak an inode,
which is not a disaster.  It will be easily fixed the next time we run
fsck, and it's better than potentially having blocks getting claimed
by two different files, and losing data as a result.

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/migrate.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 0b3194b59d12..75a769634b2b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -435,12 +435,12 @@ int ext4_ext_migrate(struct inode *inode)
 	percpu_down_write(&sbi->s_writepages_rwsem);
 
 	/*
-	 * Worst case we can touch the allocation bitmaps, a bgd
-	 * block, and a block to link in the orphan list.  We do need
-	 * need to worry about credits for modifying the quota inode.
+	 * Worst case we can touch the allocation bitmaps and a block
+	 * group descriptor block.  We do need need to worry about
+	 * credits for modifying the quota inode.
 	 */
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
-		4 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
+		3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
 
 	if (IS_ERR(handle)) {
 		retval = PTR_ERR(handle);
@@ -461,10 +461,6 @@ int ext4_ext_migrate(struct inode *inode)
 	 * Use the correct seed for checksum (i.e. the seed from 'inode').  This
 	 * is so that the metadata blocks will have the correct checksum after
 	 * the migration.
-	 *
-	 * Note however that, if a crash occurs during the migration process,
-	 * the recovery process is broken because the tmp_inode checksums will
-	 * be wrong and the orphans cleanup will fail.
 	 */
 	ei = EXT4_I(inode);
 	EXT4_I(tmp_inode)->i_csum_seed = ei->i_csum_seed;
@@ -476,7 +472,6 @@ int ext4_ext_migrate(struct inode *inode)
 	clear_nlink(tmp_inode);
 
 	ext4_ext_tree_init(handle, tmp_inode);
-	ext4_orphan_add(handle, tmp_inode);
 	ext4_journal_stop(handle);
 
 	/*
@@ -501,12 +496,6 @@ int ext4_ext_migrate(struct inode *inode)
 
 	handle = ext4_journal_start(inode, EXT4_HT_MIGRATE, 1);
 	if (IS_ERR(handle)) {
-		/*
-		 * It is impossible to update on-disk structures without
-		 * a handle, so just rollback in-core changes and live other
-		 * work to orphan_list_cleanup()
-		 */
-		ext4_orphan_del(NULL, tmp_inode);
 		retval = PTR_ERR(handle);
 		goto out_tmp_inode;
 	}
-- 
Gitee


From d116bcc0a0ba75c2356381010feda7d54e506a34 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Mon, 14 Mar 2022 09:58:13 +0800
Subject: [PATCH 064/340] crypto: stm32/crc32 - Fix kernel BUG triggered in
 probe()

stable inclusion
from linux-4.19.226
commit e670c4b7c1ca134463211b27af69695a3adcb846

--------------------------------

commit 29009604ad4e3ef784fd9b9fef6f23610ddf633d upstream.

The include/linux/crypto.h struct crypto_alg field cra_driver_name description
states "Unique name of the transformation provider. " ... " this contains the
name of the chip or provider and the name of the transformation algorithm."

In case of the stm32-crc driver, field cra_driver_name is identical for all
registered transformation providers and set to the name of the driver itself,
which is incorrect. This patch fixes it by assigning a unique cra_driver_name
to each registered transformation provider.

The kernel crash is triggered when the driver calls crypto_register_shashes()
which calls crypto_register_shash(), which calls crypto_register_alg(), which
calls __crypto_register_alg(), which returns -EEXIST, which is propagated
back through this call chain. Upon -EEXIST from crypto_register_shash(), the
crypto_register_shashes() starts unregistering the providers back, and calls
crypto_unregister_shash(), which calls crypto_unregister_alg(), and this is
where the BUG() triggers due to incorrect cra_refcnt.

Fixes: b51dbe90912a ("crypto: stm32 - Support for STM32 CRC32 crypto module")
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: <stable@vger.kernel.org> # 4.12+
Cc: Alexandre Torgue <alexandre.torgue@foss.st.com>
Cc: Fabien Dessenne <fabien.dessenne@st.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Lionel Debieve <lionel.debieve@st.com>
Cc: Nicolas Toromanoff <nicolas.toromanoff@st.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-stm32@st-md-mailman.stormreply.com
To: linux-crypto@vger.kernel.org
Acked-by: Nicolas Toromanoff <nicolas.toromanoff@foss.st.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/crypto/stm32/stm32_crc32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/stm32/stm32_crc32.c b/drivers/crypto/stm32/stm32_crc32.c
index 29d2095d9dfd..48c4a71d1cb3 100644
--- a/drivers/crypto/stm32/stm32_crc32.c
+++ b/drivers/crypto/stm32/stm32_crc32.c
@@ -217,7 +217,7 @@ static struct shash_alg algs[] = {
 		.digestsize     = CHKSUM_DIGEST_SIZE,
 		.base           = {
 			.cra_name               = "crc32",
-			.cra_driver_name        = DRIVER_NAME,
+			.cra_driver_name        = "stm32-crc32-crc32",
 			.cra_priority           = 200,
 			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
@@ -239,7 +239,7 @@ static struct shash_alg algs[] = {
 		.digestsize     = CHKSUM_DIGEST_SIZE,
 		.base           = {
 			.cra_name               = "crc32c",
-			.cra_driver_name        = DRIVER_NAME,
+			.cra_driver_name        = "stm32-crc32-crc32c",
 			.cra_priority           = 200,
 			.cra_flags		= CRYPTO_ALG_OPTIONAL_KEY,
 			.cra_blocksize          = CHKSUM_BLOCK_SIZE,
-- 
Gitee


From c26e893496ad4c8e7f04f37c2b672434ceef7368 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Mar 2022 09:58:14 +0800
Subject: [PATCH 065/340] af_unix: annote lockless accesses to
 unix_tot_inflight & gc_in_progress

stable inclusion
from linux-4.19.226
commit cf3c4b5912cb208194507a21f6209e8ae4e6c260

--------------------------------

commit 9d6d7f1cb67cdee15f1a0e85aacfb924e0e02435 upstream.

wait_for_unix_gc() reads unix_tot_inflight & gc_in_progress
without synchronization.

Adds READ_ONCE()/WRITE_ONCE() and their associated comments
to better document the intent.

BUG: KCSAN: data-race in unix_inflight / wait_for_unix_gc

write to 0xffffffff86e2b7c0 of 4 bytes by task 9380 on cpu 0:
 unix_inflight+0x1e8/0x260 net/unix/scm.c:63
 unix_attach_fds+0x10c/0x1e0 net/unix/scm.c:121
 unix_scm_to_skb net/unix/af_unix.c:1674 [inline]
 unix_dgram_sendmsg+0x679/0x16b0 net/unix/af_unix.c:1817
 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549
 __do_sys_sendmmsg net/socket.c:2578 [inline]
 __se_sys_sendmmsg net/socket.c:2575 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffffffff86e2b7c0 of 4 bytes by task 9375 on cpu 1:
 wait_for_unix_gc+0x24/0x160 net/unix/garbage.c:196
 unix_dgram_sendmsg+0x8e/0x16b0 net/unix/af_unix.c:1772
 unix_seqpacket_sendmsg+0xcc/0x110 net/unix/af_unix.c:2258
 sock_sendmsg_nosec net/socket.c:704 [inline]
 sock_sendmsg net/socket.c:724 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2409
 ___sys_sendmsg net/socket.c:2463 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2549
 __do_sys_sendmmsg net/socket.c:2578 [inline]
 __se_sys_sendmmsg net/socket.c:2575 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2575
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000002 -> 0x00000004

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 9375 Comm: syz-executor.1 Not tainted 5.16.0-rc7-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 9915672d4127 ("af_unix: limit unix_tot_inflight")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220114164328.2038499-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/unix/garbage.c | 14 +++++++++++---
 net/unix/scm.c     |  6 ++++--
 2 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 8bbe1b8e4ff7..4d283e26d816 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -197,8 +197,11 @@ void wait_for_unix_gc(void)
 {
 	/* If number of inflight sockets is insane,
 	 * force a garbage collect right now.
+	 * Paired with the WRITE_ONCE() in unix_inflight(),
+	 * unix_notinflight() and gc_in_progress().
 	 */
-	if (unix_tot_inflight > UNIX_INFLIGHT_TRIGGER_GC && !gc_in_progress)
+	if (READ_ONCE(unix_tot_inflight) > UNIX_INFLIGHT_TRIGGER_GC &&
+	    !READ_ONCE(gc_in_progress))
 		unix_gc();
 	wait_event(unix_gc_wait, gc_in_progress == false);
 }
@@ -218,7 +221,9 @@ void unix_gc(void)
 	if (gc_in_progress)
 		goto out;
 
-	gc_in_progress = true;
+	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
+	WRITE_ONCE(gc_in_progress, true);
+
 	/* First, select candidates for garbage collection.  Only
 	 * in-flight sockets are considered, and from those only ones
 	 * which don't have any external reference.
@@ -304,7 +309,10 @@ void unix_gc(void)
 
 	/* All candidates should have been detached by now. */
 	BUG_ON(!list_empty(&gc_candidates));
-	gc_in_progress = false;
+
+	/* Paired with READ_ONCE() in wait_for_unix_gc(). */
+	WRITE_ONCE(gc_in_progress, false);
+
 	wake_up(&unix_gc_wait);
 
  out:
diff --git a/net/unix/scm.c b/net/unix/scm.c
index 8c40f2b32392..ce700b22ecce 100644
--- a/net/unix/scm.c
+++ b/net/unix/scm.c
@@ -59,7 +59,8 @@ void unix_inflight(struct user_struct *user, struct file *fp)
 		} else {
 			BUG_ON(list_empty(&u->link));
 		}
-		unix_tot_inflight++;
+		/* Paired with READ_ONCE() in wait_for_unix_gc() */
+		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1);
 	}
 	user->unix_inflight++;
 	spin_unlock(&unix_gc_lock);
@@ -79,7 +80,8 @@ void unix_notinflight(struct user_struct *user, struct file *fp)
 
 		if (atomic_long_dec_and_test(&u->inflight))
 			list_del_init(&u->link);
-		unix_tot_inflight--;
+		/* Paired with READ_ONCE() in wait_for_unix_gc() */
+		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1);
 	}
 	user->unix_inflight--;
 	spin_unlock(&unix_gc_lock);
-- 
Gitee


From dcc767345649d22d7433dc1c1704e26d86caa885 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 14 Mar 2022 09:58:15 +0800
Subject: [PATCH 066/340] netns: add schedule point in ops_exit_list()

stable inclusion
from linux-4.19.226
commit 4048cedfd16a995b2ef4294b9539da17e2fea750

--------------------------------

commit 2836615aa22de55b8fca5e32fe1b27a67cda625e upstream.

When under stress, cleanup_net() can have to dismantle
netns in big numbers. ops_exit_list() currently calls
many helpers [1] that have no schedule point, and we can
end up with soft lockups, particularly on hosts
with many cpus.

Even for moderate amount of netns processed by cleanup_net()
this patch avoids latency spikes.

[1] Some of these helpers like fib_sync_up() and fib_sync_down_dev()
are very slow because net/ipv4/fib_semantics.c uses host-wide hash tables,
and ifindex is used as the only input of two hash functions.
    ifindexes tend to be the same for all netns (lo.ifindex==1 per instance)
    This will be fixed in a separate patch.

Fixes: 72ad937abd0a ("net: Add support for batching network namespace cleanups")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/net_namespace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index c60123dff803..f4aacbb76fa5 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -149,8 +149,10 @@ static void ops_exit_list(const struct pernet_operations *ops,
 {
 	struct net *net;
 	if (ops->exit) {
-		list_for_each_entry(net, net_exit_list, exit_list)
+		list_for_each_entry(net, net_exit_list, exit_list) {
 			ops->exit(net);
+			cond_resched();
+		}
 	}
 	if (ops->exit_batch)
 		ops->exit_batch(net_exit_list);
-- 
Gitee


From 969a3664489d338d03b49cc383dc3df7981aa0e9 Mon Sep 17 00:00:00 2001
From: "Doyle, Patrick" <pdoyle@irobot.com>
Date: Mon, 14 Mar 2022 09:58:16 +0800
Subject: [PATCH 067/340] mtd: nand: bbt: Fix corner case in bad block table
 handling

stable inclusion
from linux-4.19.226
commit 1550a97e4a5d8bf29071bd6c17355ca173e90f73

--------------------------------

commit fd0d8d85f7230052e638a56d1bfea170c488e6bc upstream.

In the unlikely event that both blocks 10 and 11 are marked as bad (on a
32 bit machine), then the process of marking block 10 as bad stomps on
cached entry for block 11.  There are (of course) other examples.

Signed-off-by: Patrick Doyle <pdoyle@irobot.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Yoshio Furuyama <ytc-mb-yfuruyama7@kioxia.com>
[<miquel.raynal@bootlin.com>: Fixed the title]
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Cc: Frieder Schrempf <frieder.schrempf@kontron.de>
Link: https://lore.kernel.org/linux-mtd/774a92693f311e7de01e5935e720a179fb1b2468.1616635406.git.ytc-mb-yfuruyama7@kioxia.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/mtd/nand/bbt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/bbt.c b/drivers/mtd/nand/bbt.c
index 044adf913854..64af6898131d 100644
--- a/drivers/mtd/nand/bbt.c
+++ b/drivers/mtd/nand/bbt.c
@@ -123,7 +123,7 @@ int nanddev_bbt_set_block_status(struct nand_device *nand, unsigned int entry,
 		unsigned int rbits = bits_per_block + offs - BITS_PER_LONG;
 
 		pos[1] &= ~GENMASK(rbits - 1, 0);
-		pos[1] |= val >> rbits;
+		pos[1] |= val >> (bits_per_block - rbits);
 	}
 
 	return 0;
-- 
Gitee


From 3dfc9fb2089afddb537aab262cf3bb69e7fb8a33 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 14 Mar 2022 09:58:17 +0800
Subject: [PATCH 068/340] select: Fix indefinitely sleeping task in
 poll_schedule_timeout()

stable inclusion
from linux-4.19.227
commit 6717900f775a6129a7b4d03ba4922218d8bf1caa

--------------------------------

commit 68514dacf2715d11b91ca50d88de047c086fea9c upstream.

A task can end up indefinitely sleeping in do_select() ->
poll_schedule_timeout() when the following race happens:

  TASK1 (thread1)             TASK2                   TASK1 (thread2)
  do_select()
    setup poll_wqueues table
    with 'fd'
                              write data to 'fd'
                                pollwake()
                                  table->triggered = 1
                                                      closes 'fd' thread1 is
                                                        waiting for
    poll_schedule_timeout()
      - sees table->triggered
      table->triggered = 0
      return -EINTR
    loop back in do_select()

But at this point when TASK1 loops back, the fdget() in the setup of
poll_wqueues fails.  So now so we never find 'fd' is ready for reading
and sleep in poll_schedule_timeout() indefinitely.

Treat an fd that got closed as a fd on which some event happened.  This
makes sure cannot block indefinitely in do_select().

Another option would be to return -EBADF in this case but that has a
potential of subtly breaking applications that excercise this behavior
and it happens to work for them.  So returning fd as active seems like a
safer choice.

Suggested-by: Linus Torvalds <torvalds@linux-foundation.org>
CC: stable@vger.kernel.org
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/select.c | 63 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 30 deletions(-)

diff --git a/fs/select.c b/fs/select.c
index b684f0dd6db8..67e4be14c00c 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -458,9 +458,11 @@ static int max_select_fd(unsigned long n, fd_set_bits *fds)
 	return max;
 }
 
-#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR)
-#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR)
-#define POLLEX_SET (EPOLLPRI)
+#define POLLIN_SET (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN | EPOLLHUP | EPOLLERR |\
+			EPOLLNVAL)
+#define POLLOUT_SET (EPOLLWRBAND | EPOLLWRNORM | EPOLLOUT | EPOLLERR |\
+			 EPOLLNVAL)
+#define POLLEX_SET (EPOLLPRI | EPOLLNVAL)
 
 static inline void wait_key_set(poll_table *wait, unsigned long in,
 				unsigned long out, unsigned long bit,
@@ -527,6 +529,7 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
 					break;
 				if (!(bit & all_bits))
 					continue;
+				mask = EPOLLNVAL;
 				f = fdget(i);
 				if (f.file) {
 					wait_key_set(wait, in, out, bit,
@@ -534,34 +537,34 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
 					mask = vfs_poll(f.file, wait);
 
 					fdput(f);
-					if ((mask & POLLIN_SET) && (in & bit)) {
-						res_in |= bit;
-						retval++;
-						wait->_qproc = NULL;
-					}
-					if ((mask & POLLOUT_SET) && (out & bit)) {
-						res_out |= bit;
-						retval++;
-						wait->_qproc = NULL;
-					}
-					if ((mask & POLLEX_SET) && (ex & bit)) {
-						res_ex |= bit;
-						retval++;
-						wait->_qproc = NULL;
-					}
-					/* got something, stop busy polling */
-					if (retval) {
-						can_busy_loop = false;
-						busy_flag = 0;
-
-					/*
-					 * only remember a returned
-					 * POLL_BUSY_LOOP if we asked for it
-					 */
-					} else if (busy_flag & mask)
-						can_busy_loop = true;
-
 				}
+				if ((mask & POLLIN_SET) && (in & bit)) {
+					res_in |= bit;
+					retval++;
+					wait->_qproc = NULL;
+				}
+				if ((mask & POLLOUT_SET) && (out & bit)) {
+					res_out |= bit;
+					retval++;
+					wait->_qproc = NULL;
+				}
+				if ((mask & POLLEX_SET) && (ex & bit)) {
+					res_ex |= bit;
+					retval++;
+					wait->_qproc = NULL;
+				}
+				/* got something, stop busy polling */
+				if (retval) {
+					can_busy_loop = false;
+					busy_flag = 0;
+
+				/*
+				 * only remember a returned
+				 * POLL_BUSY_LOOP if we asked for it
+				 */
+				} else if (busy_flag & mask)
+					can_busy_loop = true;
+
 			}
 			if (res_in)
 				*rinp = res_in;
-- 
Gitee


From faba439fce06cc5bf270889214dd947a9fd3f65b Mon Sep 17 00:00:00 2001
From: Jordy Zomer <jordy@pwning.systems>
Date: Mon, 14 Mar 2022 10:50:19 +0800
Subject: [PATCH 069/340] nfc: st21nfca: Fix potential buffer overflows in
 EVT_TRANSACTION

mainline inclusion
from mainline-v5.17-rc1
commit 4fbcc1a4cb20fe26ad0225679c536c80f1648221
category: bugfix
bugzilla: 186393
CVE: CVE-2022-26490

-------------------------------------------------

It appears that there are some buffer overflows in EVT_TRANSACTION.
This happens because the length parameters that are passed to memcpy
come directly from skb->data and are not guarded in any way.

Signed-off-by: Jordy Zomer <jordy@pwning.systems>
Reviewed-by: Krzysztof Kozlowski <krzysztof.kozlowski@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/nfc/st21nfca/se.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/nfc/st21nfca/se.c b/drivers/nfc/st21nfca/se.c
index fd967a38a94a..ced3c20d6453 100644
--- a/drivers/nfc/st21nfca/se.c
+++ b/drivers/nfc/st21nfca/se.c
@@ -332,6 +332,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
 			return -ENOMEM;
 
 		transaction->aid_len = skb->data[1];
+
+		/* Checking if the length of the AID is valid */
+		if (transaction->aid_len > sizeof(transaction->aid))
+			return -EINVAL;
+
 		memcpy(transaction->aid, &skb->data[2],
 		       transaction->aid_len);
 
@@ -341,6 +346,11 @@ int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
 			return -EPROTO;
 
 		transaction->params_len = skb->data[transaction->aid_len + 3];
+
+		/* Total size is allocated (skb->len - 2) minus fixed array members */
+		if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction)))
+			return -EINVAL;
+
 		memcpy(transaction->params, skb->data +
 		       transaction->aid_len + 4, transaction->params_len);
 
-- 
Gitee


From 612e8299ecc7897799df1be7703f7f7f6d6f2a9c Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Mon, 14 Mar 2022 14:57:32 +0800
Subject: [PATCH 070/340] mm: fix unable to use reliable memory in page cache

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

----------------------------------------------

It is inaccurate when accumulate percpu variable without lock,
it will result in pagecache_reliable_pages to be negative in
sometime, and will prevent pagecache using reliable memory.

For more accurate statistic, replace percpu variable by percpu_counter.

The additional percpu_counter will be access in alloc_pages, the init
of these two percpu_conter is too late in late_initcall, allocations
that use alloc_pages should have to check if the two counter has been
inited, that will introduce latency, in order to sovle this, init the
two percpu_counter in advance.

Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/linux/mem_reliable.h | 17 +++++-----
 mm/mem_reliable.c            | 62 ++++++++++++++++++------------------
 mm/page_alloc.c              |  5 +++
 3 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h
index ba5d41edbc44..c4f954340ea7 100644
--- a/include/linux/mem_reliable.h
+++ b/include/linux/mem_reliable.h
@@ -21,8 +21,9 @@ extern bool shmem_reliable;
 extern struct percpu_counter reliable_shmem_used_nr_page;
 extern bool pagecache_use_reliable_mem;
 DECLARE_PER_CPU(long, nr_reliable_buddy_pages);
-DECLARE_PER_CPU(long, pagecache_reliable_pages);
-DECLARE_PER_CPU(long, anon_reliable_pages);
+
+extern struct percpu_counter pagecache_reliable_pages;
+extern struct percpu_counter anon_reliable_pages;
 extern unsigned long nr_reliable_reserve_pages __read_mostly;
 extern long shmem_reliable_nr_page __read_mostly;
 
@@ -43,6 +44,7 @@ extern void reliable_lru_add(enum lru_list lru, struct page *page,
 extern void page_cache_prepare_alloc(gfp_t *gfp);
 extern void reliable_lru_add_batch(int zid, enum lru_list lru,
 					      int val);
+extern bool mem_reliable_counter_initialized(void);
 
 static inline bool mem_reliable_is_enabled(void)
 {
@@ -81,13 +83,10 @@ static inline void reliable_page_counter(struct page *page,
 
 static inline bool reliable_mem_limit_check(unsigned long nr_page)
 {
-	int cpu;
-	long num = 0;
+	s64 num;
 
-	for_each_possible_cpu(cpu) {
-		num += per_cpu(pagecache_reliable_pages, cpu);
-		num += per_cpu(anon_reliable_pages, cpu);
-	}
+	num = percpu_counter_read_positive(&pagecache_reliable_pages);
+	num += percpu_counter_read_positive(&anon_reliable_pages);
 
 	return num + nr_page <= task_reliable_limit / PAGE_SIZE;
 }
@@ -184,6 +183,8 @@ static inline void reliable_lru_add(enum lru_list lru,
 static inline void page_cache_prepare_alloc(gfp_t *gfp) {}
 static inline void reliable_lru_add_batch(int zid, enum lru_list lru,
 						     int val) {}
+
+static inline bool mem_reliable_counter_initialized(void) { return false; }
 #endif
 
 #endif
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 62df78657ff9..033af716610f 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -34,12 +34,18 @@ unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE;
 long shmem_reliable_nr_page = LONG_MAX;
 
 bool pagecache_use_reliable_mem __read_mostly = true;
-DEFINE_PER_CPU(long, pagecache_reliable_pages);
-DEFINE_PER_CPU(long, anon_reliable_pages);
+struct percpu_counter pagecache_reliable_pages;
+struct percpu_counter anon_reliable_pages;
 
 static unsigned long zero;
 static unsigned long reliable_pagecache_max_bytes = ULONG_MAX;
 
+bool mem_reliable_counter_initialized(void)
+{
+	return likely(percpu_counter_initialized(&pagecache_reliable_pages)) &&
+		likely((percpu_counter_initialized(&anon_reliable_pages)));
+}
+
 bool mem_reliable_status(void)
 {
 	return mem_reliable_is_enabled();
@@ -66,9 +72,9 @@ void reliable_lru_add_batch(int zid, enum lru_list lru, int val)
 
 	if (zid < ZONE_MOVABLE && zid >= 0) {
 		if (is_file_lru(lru))
-			this_cpu_add(pagecache_reliable_pages, val);
+			percpu_counter_add(&pagecache_reliable_pages, val);
 		else if (is_anon_lru(lru))
-			this_cpu_add(anon_reliable_pages, val);
+			percpu_counter_add(&anon_reliable_pages, val);
 	}
 }
 
@@ -78,14 +84,14 @@ void reliable_lru_add(enum lru_list lru, struct page *page, int val)
 		return;
 
 	if (is_file_lru(lru))
-		this_cpu_add(pagecache_reliable_pages, val);
+		percpu_counter_add(&pagecache_reliable_pages, val);
 	else if (is_anon_lru(lru))
-		this_cpu_add(anon_reliable_pages, val);
+		percpu_counter_add(&anon_reliable_pages, val);
 	else if (lru == LRU_UNEVICTABLE) {
 		if (PageAnon(page))
-			this_cpu_add(anon_reliable_pages, val);
+			percpu_counter_add(&anon_reliable_pages, val);
 		else
-			this_cpu_add(pagecache_reliable_pages, val);
+			percpu_counter_add(&pagecache_reliable_pages, val);
 	}
 }
 
@@ -188,21 +194,20 @@ static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 void reliable_report_meminfo(struct seq_file *m)
 {
 	bool pagecache_enabled = pagecache_reliable_is_enabled();
-	long nr_pagecache_pages = 0;
-	long nr_anon_pages = 0;
+	s64 nr_pagecache_pages = 0;
+	s64 nr_anon_pages = 0;
 	long nr_buddy_pages = 0;
 	int cpu;
 
 	if (!mem_reliable_is_enabled())
 		return;
 
-	for_each_possible_cpu(cpu) {
+	for_each_possible_cpu(cpu)
 		nr_buddy_pages += per_cpu(nr_reliable_buddy_pages, cpu);
-		nr_anon_pages += per_cpu(anon_reliable_pages, cpu);
-		if (pagecache_enabled)
-			nr_pagecache_pages +=
-				per_cpu(pagecache_reliable_pages, cpu);
-	}
+
+	nr_anon_pages = percpu_counter_sum_positive(&anon_reliable_pages);
+	if (pagecache_enabled)
+		nr_pagecache_pages = percpu_counter_sum_positive(&pagecache_reliable_pages);
 
 	show_val_kb(m, "ReliableTotal:    ",
 			total_reliable_mem_sz() >> PAGE_SHIFT);
@@ -445,8 +450,7 @@ static struct ctl_table reliable_dir_table[] = {
 
 void page_cache_prepare_alloc(gfp_t *gfp)
 {
-	long nr_reliable = 0;
-	int cpu;
+	s64 nr_reliable = 0;
 
 	if (!mem_reliable_is_enabled())
 		return;
@@ -454,11 +458,7 @@ void page_cache_prepare_alloc(gfp_t *gfp)
 	if (!pagecache_reliable_is_enabled())
 		goto no_reliable;
 
-	for_each_possible_cpu(cpu)
-		nr_reliable += this_cpu_read(pagecache_reliable_pages);
-
-	if (nr_reliable < 0)
-		goto no_reliable;
+	nr_reliable = percpu_counter_read_positive(&pagecache_reliable_pages);
 
 	if (nr_reliable > reliable_pagecache_max_bytes >> PAGE_SHIFT)
 		goto no_reliable;
@@ -480,9 +480,12 @@ static int __init reliable_sysctl_init(void)
 		return -1;
 	}
 
+	percpu_counter_init(&pagecache_reliable_pages, 0, GFP_KERNEL);
+	percpu_counter_init(&anon_reliable_pages, 0, GFP_KERNEL);
+
 	return 0;
 }
-late_initcall(reliable_sysctl_init);
+arch_initcall(reliable_sysctl_init);
 #else
 static void mem_reliable_ctrl_bit_disabled(int idx) {}
 #endif
@@ -515,21 +518,18 @@ static void mem_reliable_feature_disable(int idx)
 
 void reliable_show_mem_info(void)
 {
-	int cpu;
-	long num = 0;
+	s64 num = 0;
 
 	if (!mem_reliable_is_enabled())
 		return;
 
-	for_each_possible_cpu(cpu) {
-		num += per_cpu(anon_reliable_pages, cpu);
-		num += per_cpu(pagecache_reliable_pages, cpu);
-	}
+	num += percpu_counter_sum_positive(&anon_reliable_pages);
+	num += percpu_counter_sum_positive(&pagecache_reliable_pages);
 
 	pr_info("ReliableTotal: %lu kB", total_reliable_mem_sz() >> 10);
 	pr_info("ReliableUsed: %lu kB", used_reliable_mem_sz() >> 10);
 	pr_info("task_reliable_limit: %lu kB", task_reliable_limit >> 10);
-	pr_info("reliable_user_used: %ld kB", num << (PAGE_SHIFT - 10));
+	pr_info("reliable_user_used: %lld kB", num << (PAGE_SHIFT - 10));
 }
 
 void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fd4354d9ebad..dab62d1d3a6e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4674,6 +4674,11 @@ static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
 	 * allocation trigger task_reliable_limit
 	 */
 	if (is_global_init(current)) {
+		if (!mem_reliable_counter_initialized()) {
+			*gfp_mask |= ___GFP_RELIABILITY;
+			return true;
+		}
+
 		if (reliable_mem_limit_check(1 << order) &&
 		    mem_reliable_watermark_ok(1 << order))
 			*gfp_mask |= ___GFP_RELIABILITY;
-- 
Gitee


From 216ce413fb52be3da2dd38e854c185bc61dfcfd6 Mon Sep 17 00:00:00 2001
From: Yu Liao <liaoyu15@huawei.com>
Date: Mon, 14 Mar 2022 14:57:33 +0800
Subject: [PATCH 071/340] mm: introduce "clear_freelist" kernel parameter

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4XB9H
CVE: NA

--------------------------------

CONFIG_CLEAR_FREELIST_PAGE specifies the default value for clear
freelist. Add a kernel parameter to make it possible to
override the default. Keep clear_freelist disabled unless
"clear_freelist" is explicitly specified.

Signed-off-by: Yu Liao <liaoyu15@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 Documentation/admin-guide/kernel-parameters.txt |  4 ++++
 mm/clear_freelist_page.c                        | 11 ++++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index a46b2fe191ba..2cb6c844d751 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -516,6 +516,10 @@
 
 	cio_ignore=	[S390]
 			See Documentation/s390/CommonIO for details.
+
+	clear_freelist
+			Enable clear_freelist feature.
+
 	clk_ignore_unused
 			[CLK]
 			Prevents the clock framework from automatically gating
diff --git a/mm/clear_freelist_page.c b/mm/clear_freelist_page.c
index 69975f458dc7..f0a13b8fdcdb 100644
--- a/mm/clear_freelist_page.c
+++ b/mm/clear_freelist_page.c
@@ -154,9 +154,18 @@ static struct ctl_table sys_ctl_table[] = {
 	{ }
 };
 
+static bool clear_freelist_enabled;
+static int __init setup_clear_freelist(char *str)
+{
+	clear_freelist_enabled = true;
+	return 1;
+}
+__setup("clear_freelist", setup_clear_freelist);
+
 static int __init clear_freelist_init(void)
 {
-	register_sysctl_table(sys_ctl_table);
+	if (clear_freelist_enabled)
+		register_sysctl_table(sys_ctl_table);
 
 	return 0;
 }
-- 
Gitee


From 6fb0d251f7687d8e9519db87b07275d04e5b35e9 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Mon, 14 Mar 2022 21:56:59 +0800
Subject: [PATCH 072/340] mm: disable memory reliable when kdump is in progress

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Kdump only have limited memory and will lead to bugly memory reliable
features if memory reliable if enabled. So disable memory reliable if kdump
is in progress.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 mm/mem_reliable.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 033af716610f..5505577d3784 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -9,6 +9,7 @@
 #include <linux/seq_file.h>
 #include <linux/mmzone.h>
 #include <linux/oom.h>
+#include <linux/crash_dump.h>
 
 #define MEM_RELIABLE_RESERVE_MIN (256UL << 20)
 
@@ -128,6 +129,11 @@ void mem_reliable_init(bool has_unmirrored_mem, unsigned long *zone_movable_pfn)
 	if (!reliable_enabled)
 		return;
 
+	if (is_kdump_kernel()) {
+		pr_err("init failed, the kdump is in progress\n");
+		return;
+	}
+
 	if (atomic_long_read(&total_reliable_mem) == 0) {
 		memset(zone_movable_pfn, 0,
 		       sizeof(unsigned long) * MAX_NUMNODES);
-- 
Gitee


From 9b6c51cd780588813c5d327e2a6d677d010a2b6f Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Mon, 14 Mar 2022 21:57:00 +0800
Subject: [PATCH 073/340] mm: fix zoneref mapping problem in memory reliable

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

The mapping between zoneref and zone will be updated if __GFP_THISNODE is
defined and memory reliable fallback is enabled. This will put ZONE_MOVABLE
in a wrong zonerefs slot and lead to the origin zone unselectable.

With this patch, high_zoneidx is updated via gfp_zone() which
___GFP_RELIABILITY is removed from the origin gfp_mask. Perferred zoneref
is recalculated after.

Fixes: 3023a4b35d41 ("mm: Introduce fallback mechanism for memory reliable")
Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 mm/page_alloc.c | 25 ++++++++++++-------------
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dab62d1d3a6e..f4d4716b9049 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3672,21 +3672,21 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
 }
 
 #ifdef CONFIG_MEMORY_RELIABLE
-static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
-						       struct alloc_context *ac)
+static inline void reliable_fb_find_zone(gfp_t gfp_mask,
+					 struct alloc_context *ac)
 {
 	if (!reliable_allow_fb_enabled())
-		return NULL;
+		return;
 
-	/* dst nodemask may don't have zone we want, fallback here */
+	/* dst node don't have zone we want, fallback here */
 	if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) &&
 	    (gfp_mask & ___GFP_RELIABILITY)) {
-		struct zoneref *ref = first_zones_zonelist(
-			ac->zonelist, ZONE_MOVABLE, ac->nodemask);
-		return ref->zone;
+		ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY);
+		ac->preferred_zoneref = first_zones_zonelist(
+			ac->zonelist, ac->high_zoneidx, ac->nodemask);
 	}
 
-	return NULL;
+	return;
 }
 
 static inline struct page *
@@ -3712,10 +3712,10 @@ reliable_fb_before_oom(gfp_t gfp_mask, int order,
 	return NULL;
 }
 #else
-static inline struct zone *reliable_fb_find_zone(gfp_t gfp_mask,
-						       struct alloc_context *ac)
+static inline void reliable_fb_find_zone(gfp_t gfp_mask,
+					 struct alloc_context *ac)
 {
-	return NULL;
+	return;
 }
 
 static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order,
@@ -4375,8 +4375,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
 	if (!ac->preferred_zoneref->zone) {
-		ac->preferred_zoneref->zone =
-			reliable_fb_find_zone(gfp_mask, ac);
+		reliable_fb_find_zone(gfp_mask, ac);
 
 		if (!ac->preferred_zoneref->zone)
 			goto nopage;
-- 
Gitee


From fde6ae6107f086d2bbfcf52a6abae99ae41b953a Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Mon, 14 Mar 2022 21:57:01 +0800
Subject: [PATCH 074/340] mm: Count reliable shmem used based on NR_SHMEM

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

------------------------------------------

With this patch, reliable memory counter will be updated when NR_SHMEM is
updated. Pervious shmem reliable memory counter is not accurate if swap is
enabled.

NR_SHMEM update in memcg secenario is ignored because this has nothing to
do with the global counter. If shmem pages is migrated or collapsed from
one region to another region, reliable memory counter need to be updated
because these pages's reliable status may not be the same.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 mm/filemap.c    | 5 ++++-
 mm/khugepaged.c | 2 ++
 mm/migrate.c    | 5 +++++
 mm/shmem.c      | 7 ++-----
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/mm/filemap.c b/mm/filemap.c
index a89d70097e68..9b6e72e14a04 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -291,6 +291,7 @@ static void unaccount_page_cache_page(struct address_space *mapping,
 	__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
 	if (PageSwapBacked(page)) {
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, -nr);
+		shmem_reliable_page_counter(page, -nr);
 		if (PageTransHuge(page))
 			__dec_node_page_state(page, NR_SHMEM_THPS);
 	} else {
@@ -895,8 +896,10 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
 		 */
 		if (!PageHuge(new))
 			__inc_node_page_state(new, NR_FILE_PAGES);
-		if (PageSwapBacked(new))
+		if (PageSwapBacked(new)) {
 			__inc_node_page_state(new, NR_SHMEM);
+			shmem_reliable_page_counter(new, 1);
+		}
 		xa_unlock_irqrestore(&mapping->i_pages, flags);
 		mem_cgroup_migrate(old, new);
 		radix_tree_preload_end();
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index 301d6aa079d7..2975fc124cb6 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1559,6 +1559,7 @@ static void collapse_shmem(struct mm_struct *mm,
 			ClearPageActive(page);
 			ClearPageUnevictable(page);
 			unlock_page(page);
+			shmem_reliable_page_counter(page, -1);
 			put_page(page);
 			index++;
 		}
@@ -1573,6 +1574,7 @@ static void collapse_shmem(struct mm_struct *mm,
 		mem_cgroup_commit_charge(new_page, memcg, false, true);
 		count_memcg_events(memcg, THP_COLLAPSE_ALLOC, 1);
 		lru_cache_add_anon(new_page);
+		shmem_reliable_page_counter(new_page, 1 << HPAGE_PMD_ORDER);
 
 		/*
 		 * Remove pte page tables, so we can re-fault the page as huge.
diff --git a/mm/migrate.c b/mm/migrate.c
index f7721d0aece5..ecfa8829acfd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -548,6 +548,11 @@ int migrate_page_move_mapping(struct address_space *mapping,
 	xa_unlock(&mapping->i_pages);
 	/* Leave irq disabled to prevent preemption while updating stats */
 
+	if (PageSwapBacked(page) && !PageSwapCache(page)) {
+		shmem_reliable_page_counter(page, -nr);
+		shmem_reliable_page_counter(newpage, nr);
+	}
+
 	/*
 	 * If moved to a different zone then also account
 	 * the page for that zone. Other VM counters will be
diff --git a/mm/shmem.c b/mm/shmem.c
index 4363dbc8d57e..8915a5b9ad0a 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -733,6 +733,7 @@ static int shmem_add_to_page_cache(struct page *page,
 			__inc_node_page_state(page, NR_SHMEM_THPS);
 		__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
 		__mod_node_page_state(page_pgdat(page), NR_SHMEM, nr);
+		shmem_reliable_page_counter(page, nr);
 		xa_unlock_irq(&mapping->i_pages);
 	} else {
 		page->mapping = NULL;
@@ -758,6 +759,7 @@ static void shmem_delete_from_page_cache(struct page *page, void *radswap)
 	mapping->nrpages--;
 	__dec_node_page_state(page, NR_FILE_PAGES);
 	__dec_node_page_state(page, NR_SHMEM);
+	shmem_reliable_page_counter(page, -1);
 	xa_unlock_irq(&mapping->i_pages);
 	put_page(page);
 	BUG_ON(error);
@@ -962,8 +964,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 					truncate_inode_page(mapping, page);
 				}
 			}
-			shmem_reliable_page_counter(
-				page, -(1 << compound_order(page)));
 			unlock_page(page);
 		}
 		pagevec_remove_exceptionals(&pvec);
@@ -1074,8 +1074,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 					break;
 				}
 			}
-			shmem_reliable_page_counter(
-				page, -(1 << compound_order(page)));
 			unlock_page(page);
 		}
 		pagevec_remove_exceptionals(&pvec);
@@ -1981,7 +1979,6 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
 		inode->i_blocks += BLOCKS_PER_PAGE << compound_order(page);
 		shmem_recalc_inode(inode);
 		spin_unlock_irq(&info->lock);
-		shmem_reliable_page_counter(page, 1 << compound_order(page));
 		alloced = true;
 
 		if (PageTransHuge(page) &&
-- 
Gitee


From 46e70fd20c9b8c6b15ff9cb7750b818f44fa52c8 Mon Sep 17 00:00:00 2001
From: Max Kellermann <max.kellermann@ionos.com>
Date: Mon, 14 Mar 2022 22:28:57 +0800
Subject: [PATCH 075/340] lib/iov_iter: initialize "flags" in new pipe_buffer

mainline inclusion
from mainline-v5.17-rc6
commit 9d2231c5d74e13b2a0546fee6737ee4446017903
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4X1GI?from=project-issue
CVE: NA

--------------------------------

The functions copy_page_to_iter_pipe() and push_pipe() can both
allocate a new pipe_buffer, but the "flags" member initializer is
missing.

Fixes: 241699cd72a8 ("new iov_iter flavour: pipe-backed")
To: Alexander Viro <viro@zeniv.linux.org.uk>
To: linux-fsdevel@vger.kernel.org
To: linux-kernel@vger.kernel.org
Cc: stable@vger.kernel.org
Signed-off-by: Max Kellermann <max.kellermann@ionos.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 lib/iov_iter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 25668139fc1f..b80320956caf 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -506,6 +506,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by
 		return 0;
 	pipe->nrbufs++;
 	buf->ops = &page_cache_pipe_buf_ops;
+	buf->flags = 0;
 	get_page(buf->page = page);
 	buf->offset = offset;
 	buf->len = bytes;
@@ -630,6 +631,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size,
 			break;
 		pipe->nrbufs++;
 		pipe->bufs[idx].ops = &default_pipe_buf_ops;
+		pipe->bufs[idx].flags = 0;
 		pipe->bufs[idx].page = page;
 		pipe->bufs[idx].offset = 0;
 		if (left <= PAGE_SIZE) {
-- 
Gitee


From 939559f58fc75629a99eb4003ea3f3ada52dea2f Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Mar 2022 10:04:17 +0800
Subject: [PATCH 076/340] crypto: pcrypt - Fix user-after-free on module unload

stable inclusion
from linux-4.19.102
commit 47ef5cb878817127bd3d54c3578bbbd3f7c2bf2c
CVE: NA

-------------------------------

[ Upstream commit 07bfd9bdf568a38d9440c607b72342036011f727 ]

On module unload of pcrypt we must unregister the crypto algorithms
first and then tear down the padata structure.  As otherwise the
crypto algorithms are still alive and can be used while the padata
structure is being freed.

Fixes: 5068c7a883d1 ("crypto: pcrypt - Add pcrypt crypto...")
Cc: <stable@vger.kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Lu Jialin <lujialin4@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 crypto/pcrypt.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 00c7230acfed..4bf4bb5b1a8b 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -509,11 +509,12 @@ static int __init pcrypt_init(void)
 
 static void __exit pcrypt_exit(void)
 {
+	crypto_unregister_template(&pcrypt_tmpl);
+
 	pcrypt_fini_padata(&pencrypt);
 	pcrypt_fini_padata(&pdecrypt);
 
 	kset_unregister(pcrypt_kset);
-	crypto_unregister_template(&pcrypt_tmpl);
 }
 
 module_init(pcrypt_init);
-- 
Gitee


From 2632c58b0aa3a8b5b0d921961f7360f4510a1d7d Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Thu, 17 Mar 2022 20:40:44 +0800
Subject: [PATCH 077/340] veth: Do not record rx queue hint in veth_xmit

stable inclusion
from linux-4.19.226
commit bd6e97e2b6f59a19894c7032a83f03ad38ede28e

--------------------------------

commit 710ad98c363a66a0cd8526465426c5c5f8377ee0 upstream.

Laurent reported that they have seen a significant amount of TCP retransmissions
at high throughput from applications residing in network namespaces talking to
the outside world via veths. The drops were seen on the qdisc layer (fq_codel,
as per systemd default) of the phys device such as ena or virtio_net due to all
traffic hitting a _single_ TX queue _despite_ multi-queue device. (Note that the
setup was _not_ using XDP on veths as the issue is generic.)

More specifically, after edbea9220251 ("veth: Store queue_mapping independently
of XDP prog presence") which made it all the way back to v4.19.184+,
skb_record_rx_queue() would set skb->queue_mapping to 1 (given 1 RX and 1 TX
queue by default for veths) instead of leaving at 0.

This is eventually retained and callbacks like ena_select_queue() will also pick
single queue via netdev_core_pick_tx()'s ndo_select_queue() once all the traffic
is forwarded to that device via upper stack or other means. Similarly, for others
not implementing ndo_select_queue() if XPS is disabled, netdev_pick_tx() might
call into the skb_tx_hash() and check for prior skb_rx_queue_recorded() as well.

In general, it is a _bad_ idea for virtual devices like veth to mess around with
queue selection [by default]. Given dev->real_num_tx_queues is by default 1,
the skb->queue_mapping was left untouched, and so prior to edbea9220251 the
netdev_core_pick_tx() could do its job upon __dev_queue_xmit() on the phys device.

Unbreak this and restore prior behavior by removing the skb_record_rx_queue()
from veth_xmit() altogether.

If the veth peer has an XDP program attached, then it would return the first RX
queue index in xdp_md->rx_queue_index (unless configured in non-default manner).
However, this is still better than breaking the generic case.

Fixes: edbea9220251 ("veth: Store queue_mapping independently of XDP prog presence")
Fixes: 638264dc9022 ("veth: Support per queue XDP ring")
Reported-by: Laurent Bernaille <laurent.bernaille@datadoghq.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Cc: Toshiaki Makita <toshiaki.makita1@gmail.com>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: John Fastabend <john.fastabend@gmail.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Acked-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Conflicts:
	drivers/net/veth.c
Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/veth.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 41a00cd76955..749faa6fcd82 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -197,8 +197,6 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (rxq < rcv->real_num_rx_queues) {
 		rq = &rcv_priv->rq[rxq];
 		rcv_xdp = rcu_access_pointer(rq->xdp_prog);
-		if (rcv_xdp)
-			skb_record_rx_queue(skb, rxq);
 	}
 
 	if (likely(veth_forward_skb(rcv, skb, rq, rcv_xdp) == NET_RX_SUCCESS)) {
-- 
Gitee


From a9140c0834eacdc16f3f15f0fa8199c2769fdf1e Mon Sep 17 00:00:00 2001
From: Cui GaoSheng <cuigaosheng1@huawei.com>
Date: Thu, 17 Mar 2022 20:40:45 +0800
Subject: [PATCH 078/340] Revert "audit: bugfix for infinite loop when flush
 the hold queue"

hulk inclusion
category: bugfix
bugzilla: 186384 https://gitee.com/openeuler/kernel/issues/I4X1AI?from=project-issue
CVE: NA

--------------------------------

This reverts commit 67ab712f2b0e7448052f3b8fdee727fb7e204448.

Signed-off-by: Cui GaoSheng <cuigaosheng1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/audit.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index 3de5ebb94559..c5e034fe14bb 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -740,8 +740,6 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 		if (!sk) {
 			if (err_hook)
 				(*err_hook)(skb);
-			if (queue == &audit_hold_queue)
-				goto out;
 			continue;
 		}
 
@@ -758,8 +756,6 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 					(*err_hook)(skb);
 				if (rc == -EAGAIN)
 					rc = 0;
-				if (queue == &audit_hold_queue)
-					goto out;
 				/* continue to drain the queue */
 				continue;
 			} else
@@ -771,7 +767,6 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 		}
 	}
 
-out:
 	return (rc >= 0 ? 0 : rc);
 }
 
-- 
Gitee


From 1518b80ba8be3561e002dbc91b6583e66b840c67 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul@paul-moore.com>
Date: Thu, 17 Mar 2022 20:40:46 +0800
Subject: [PATCH 079/340] audit: improve audit queue handling when "audit=1" on
 cmdline

mainline inclusion
from mainline-v5.17-rc3
commit f26d04331360d42dbd6b58448bd98e4edbfbe1c5
category: bugfix
bugzilla: 186384 https://gitee.com/openeuler/kernel/issues/I4X1AI?from=project-issue
CVE: NA

--------------------------------

When an admin enables audit at early boot via the "audit=1" kernel
command line the audit queue behavior is slightly different; the
audit subsystem goes to greater lengths to avoid dropping records,
which unfortunately can result in problems when the audit daemon is
forcibly stopped for an extended period of time.

This patch makes a number of changes designed to improve the audit
queuing behavior so that leaving the audit daemon in a stopped state
for an extended period does not cause a significant impact to the
system.

- kauditd_send_queue() is now limited to looping through the
  passed queue only once per call.  This not only prevents the
  function from looping indefinitely when records are returned
  to the current queue, it also allows any recovery handling in
  kauditd_thread() to take place when kauditd_send_queue()
  returns.

- Transient netlink send errors seen as -EAGAIN now cause the
  record to be returned to the retry queue instead of going to
  the hold queue.  The intention of the hold queue is to store,
  perhaps for an extended period of time, the events which led
  up to the audit daemon going offline.  The retry queue remains
  a temporary queue intended to protect against transient issues
  between the kernel and the audit daemon.

- The retry queue is now limited by the audit_backlog_limit
  setting, the same as the other queues.  This allows admins
  to bound the size of all of the audit queues on the system.

- kauditd_rehold_skb() now returns records to the end of the
  hold queue to ensure ordering is preserved in the face of
  recent changes to kauditd_send_queue().

Cc: stable@vger.kernel.org
Fixes: 5b52330bbfe63 ("audit: fix auditd/kernel connection state tracking")
Fixes: f4b3ee3c85551 ("audit: improve robustness of the audit queue handling")
Reported-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Tested-by: Gaosheng Cui <cuigaosheng1@huawei.com>
Reviewed-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Cui GaoSheng <cuigaosheng1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/audit.c | 62 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/kernel/audit.c b/kernel/audit.c
index c5e034fe14bb..7dc14a4d9e3c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -549,20 +549,22 @@ static void kauditd_printk_skb(struct sk_buff *skb)
 /**
  * kauditd_rehold_skb - Handle a audit record send failure in the hold queue
  * @skb: audit record
+ * @error: error code (unused)
  *
  * Description:
  * This should only be used by the kauditd_thread when it fails to flush the
  * hold queue.
  */
-static void kauditd_rehold_skb(struct sk_buff *skb)
+static void kauditd_rehold_skb(struct sk_buff *skb, __always_unused int error)
 {
-	/* put the record back in the queue at the same place */
-	skb_queue_head(&audit_hold_queue, skb);
+	/* put the record back in the queue */
+	skb_queue_tail(&audit_hold_queue, skb);
 }
 
 /**
  * kauditd_hold_skb - Queue an audit record, waiting for auditd
  * @skb: audit record
+ * @error: error code
  *
  * Description:
  * Queue the audit record, waiting for an instance of auditd.  When this
@@ -572,19 +574,31 @@ static void kauditd_rehold_skb(struct sk_buff *skb)
  * and queue it, if we have room.  If we want to hold on to the record, but we
  * don't have room, record a record lost message.
  */
-static void kauditd_hold_skb(struct sk_buff *skb)
+static void kauditd_hold_skb(struct sk_buff *skb, int error)
 {
 	/* at this point it is uncertain if we will ever send this to auditd so
 	 * try to send the message via printk before we go any further */
 	kauditd_printk_skb(skb);
 
 	/* can we just silently drop the message? */
-	if (!audit_default) {
-		kfree_skb(skb);
-		return;
+	if (!audit_default)
+		goto drop;
+
+	/* the hold queue is only for when the daemon goes away completely,
+	 * not -EAGAIN failures; if we are in a -EAGAIN state requeue the
+	 * record on the retry queue unless it's full, in which case drop it
+	 */
+	if (error == -EAGAIN) {
+		if (!audit_backlog_limit ||
+		    skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
+			skb_queue_tail(&audit_retry_queue, skb);
+			return;
+		}
+		audit_log_lost("kauditd retry queue overflow");
+		goto drop;
 	}
 
-	/* if we have room, queue the message */
+	/* if we have room in the hold queue, queue the message */
 	if (!audit_backlog_limit ||
 	    skb_queue_len(&audit_hold_queue) < audit_backlog_limit) {
 		skb_queue_tail(&audit_hold_queue, skb);
@@ -593,24 +607,32 @@ static void kauditd_hold_skb(struct sk_buff *skb)
 
 	/* we have no other options - drop the message */
 	audit_log_lost("kauditd hold queue overflow");
+drop:
 	kfree_skb(skb);
 }
 
 /**
  * kauditd_retry_skb - Queue an audit record, attempt to send again to auditd
  * @skb: audit record
+ * @error: error code (unused)
  *
  * Description:
  * Not as serious as kauditd_hold_skb() as we still have a connected auditd,
  * but for some reason we are having problems sending it audit records so
  * queue the given record and attempt to resend.
  */
-static void kauditd_retry_skb(struct sk_buff *skb)
+static void kauditd_retry_skb(struct sk_buff *skb, __always_unused int error)
 {
-	/* NOTE: because records should only live in the retry queue for a
-	 * short period of time, before either being sent or moved to the hold
-	 * queue, we don't currently enforce a limit on this queue */
-	skb_queue_tail(&audit_retry_queue, skb);
+	if (!audit_backlog_limit ||
+	    skb_queue_len(&audit_retry_queue) < audit_backlog_limit) {
+		skb_queue_tail(&audit_retry_queue, skb);
+		return;
+	}
+
+	/* we have to drop the record, send it via printk as a last effort */
+	kauditd_printk_skb(skb);
+	audit_log_lost("kauditd retry queue overflow");
+	kfree_skb(skb);
 }
 
 /**
@@ -648,7 +670,7 @@ static void auditd_reset(const struct auditd_connection *ac)
 	/* flush the retry queue to the hold queue, but don't touch the main
 	 * queue since we need to process that normally for multicast */
 	while ((skb = skb_dequeue(&audit_retry_queue)))
-		kauditd_hold_skb(skb);
+		kauditd_hold_skb(skb, -ECONNREFUSED);
 }
 
 /**
@@ -722,16 +744,18 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 			      struct sk_buff_head *queue,
 			      unsigned int retry_limit,
 			      void (*skb_hook)(struct sk_buff *skb),
-			      void (*err_hook)(struct sk_buff *skb))
+			      void (*err_hook)(struct sk_buff *skb, int error))
 {
 	int rc = 0;
-	struct sk_buff *skb;
+	struct sk_buff *skb = NULL;
+	struct sk_buff *skb_tail;
 	unsigned int failed = 0;
 
 	/* NOTE: kauditd_thread takes care of all our locking, we just use
 	 *       the netlink info passed to us (e.g. sk and portid) */
 
-	while ((skb = skb_dequeue(queue))) {
+	skb_tail = skb_peek_tail(queue);
+	while ((skb != skb_tail) && (skb = skb_dequeue(queue))) {
 		/* call the skb_hook for each skb we touch */
 		if (skb_hook)
 			(*skb_hook)(skb);
@@ -739,7 +763,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 		/* can we send to anyone via unicast? */
 		if (!sk) {
 			if (err_hook)
-				(*err_hook)(skb);
+				(*err_hook)(skb, -ECONNREFUSED);
 			continue;
 		}
 
@@ -753,7 +777,7 @@ static int kauditd_send_queue(struct sock *sk, u32 portid,
 			    rc == -ECONNREFUSED || rc == -EPERM) {
 				sk = NULL;
 				if (err_hook)
-					(*err_hook)(skb);
+					(*err_hook)(skb, rc);
 				if (rc == -EAGAIN)
 					rc = 0;
 				/* continue to drain the queue */
-- 
Gitee


From ab77435872e7cf1efa7ac84914b87e6f5530c187 Mon Sep 17 00:00:00 2001
From: Laibin Qiu <qiulaibin@huawei.com>
Date: Fri, 18 Mar 2022 18:00:43 +0800
Subject: [PATCH 080/340] blk-mq: add exception handling when srcu->sda alloc
 failed

hulk inclusion
category: bugfix
bugzilla: 186352, https://gitee.com/openeuler/kernel/issues/I4YADX
DTS: DTS2022031707143
CVE: NA

--------------------------------

In case of BLK_MQ_F_BLOCKING, per-hctx srcu is used to protect dispatch
critical area. But the current process is not aware when memory of srcu
allocation failed in blk_mq_alloc_hctx, which will leads to illegal
address BUG. Add return value validation to avoid this problem.

Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
Reviewed-by: Hou Tao <houtao1@huawei.com>
---
 block/blk-mq.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0732bcc65f88..9604d2b39745 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2459,12 +2459,16 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
 	if (!hctx->fq)
 		goto free_bitmap;
 
-	if (hctx->flags & BLK_MQ_F_BLOCKING)
-		init_srcu_struct(hctx->srcu);
+	if (hctx->flags & BLK_MQ_F_BLOCKING) {
+		if (init_srcu_struct(hctx->srcu) != 0)
+			goto free_flush_queue;
+	}
 	blk_mq_hctx_kobj_init(hctx);
 
 	return hctx;
 
+ free_flush_queue:
+	blk_free_flush_queue(hctx->fq);
  free_bitmap:
 	sbitmap_free(&hctx->ctx_map);
  free_ctxs:
-- 
Gitee


From c61648a11933cb33878a55ab2610347b7406a6fc Mon Sep 17 00:00:00 2001
From: Mao HongBo <maohongbo@phytium.com.cn>
Date: Fri, 18 Mar 2022 17:51:31 +0800
Subject: [PATCH 081/340] irqchip/gic-phytium-2500: Fix issue that interrupts
 are concentrated in one cpu

Phytium inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I41AUQ
CVE: NA

-------------------------------------------------

Fix the issue that interrupts are concentrated in one cpu
for Phytium S2500 server.

Signed-off-by: Mao HongBo <maohongbo@phytium.com.cn>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/irqchip/irq-gic-phytium-2500-its.c | 3 +--
 drivers/irqchip/irq-gic-phytium-2500.c     | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/irqchip/irq-gic-phytium-2500-its.c b/drivers/irqchip/irq-gic-phytium-2500-its.c
index fff1c8546d23..dd24af3793ca 100644
--- a/drivers/irqchip/irq-gic-phytium-2500-its.c
+++ b/drivers/irqchip/irq-gic-phytium-2500-its.c
@@ -1181,8 +1181,7 @@ static int its_cpumask_select(struct its_device *its_dev,
 	}
 
 	cpu = cpumask_any_and(mask_val, cpu_mask);
-	if ((cpu > cpus) && (cpu < (cpus + skt_cpu_cnt[skt_id])))
-		cpus = cpu;
+	cpus = cpus + cpu % skt_cpu_cnt[skt_id];
 
     if (is_kdump_kernel()) {
 	skt = (cpu_logical_map(cpu) >> 16) & 0xff;
diff --git a/drivers/irqchip/irq-gic-phytium-2500.c b/drivers/irqchip/irq-gic-phytium-2500.c
index 8674463a08c6..103e97f5855e 100644
--- a/drivers/irqchip/irq-gic-phytium-2500.c
+++ b/drivers/irqchip/irq-gic-phytium-2500.c
@@ -1123,8 +1123,7 @@ static int gic_cpumask_select(struct irq_data *d, const struct cpumask *mask_val
 	}
 
 	cpu = cpumask_any_and(mask_val, cpu_online_mask);
-	if ((cpu > cpus) && (cpu < (cpus + skt_cpu_cnt[irq_skt])))
-		cpus = cpu;
+	cpus = cpus + cpu % skt_cpu_cnt[irq_skt];
 
     if (is_kdump_kernel()) {
 	skt = (cpu_logical_map(cpu) >> 16) & 0xff;
-- 
Gitee


From b89cbe688b97261a4c924a74417f275aa5b25fb9 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Fri, 18 Mar 2022 09:43:49 +0000
Subject: [PATCH 082/340] livepatch/core: Check klp_func before
 'klp_init_object_loaded'

hulk inclusion
category: feature
bugzilla: 186346, https://gitee.com/openeuler/kernel/issues/I4WBFN
CVE: NA

--------------------------------

Refer to following procedure:
  klp_init_object
    klp_init_object_loaded
      klp_find_object_symbol <-- 1. oops happened when old_name is NULL!!!
    klp_init_func  <-- 2. currently old_name is first time check here

This problem was introduced in commit 453d38459f94 ("livepatch/arm64:
fix func size less than limit") which exchange order of 'klp_init_func'
and 'klp_init_object_loaded' then cause old_name being used before check.

We move these checks before 'klp_init_object_loaded' and add several
logs to tell why check failed.

Fixes: 453d38459f94 ("livepatch/arm64: fix func size less than limit")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/livepatch/core.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index 66a7c4befa0e..89b31e827425 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -1010,12 +1010,6 @@ static int klp_init_func(struct klp_object *obj, struct klp_func *func)
 	int ret;
 #endif
 
-	if (!func->old_name || !func->new_func)
-		return -EINVAL;
-
-	if (strlen(func->old_name) >= KSYM_NAME_LEN)
-		return -EINVAL;
-
 #ifdef CONFIG_LIVEPATCH_WO_FTRACE
 	ret = arch_klp_func_can_patch(func);
 	if (ret)
@@ -1105,6 +1099,16 @@ static int klp_init_object(struct klp_patch *patch, struct klp_object *obj)
 
 	if (klp_is_module(obj) && strlen(obj->name) >= MODULE_NAME_LEN)
 		return -EINVAL;
+	klp_for_each_func(obj, func) {
+		if (!func->old_name || !func->new_func) {
+			pr_err("old_name or new_func is invalid\n");
+			return -EINVAL;
+		}
+		if (strlen(func->old_name) >= KSYM_NAME_LEN) {
+			pr_err("old_name is too long\n");
+			return -EINVAL;
+		}
+	}
 
 	obj->patched = false;
 	obj->mod = NULL;
-- 
Gitee


From f91c9577095c3595db6485e6362833dcd8e0e54e Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Mon, 21 Mar 2022 08:00:20 +0000
Subject: [PATCH 083/340] ext4: Fix symlink file size not match to file content

hulk inclusion
category: bugfix
bugzilla: 186450, https://gitee.com/openeuler/kernel/issues/I4YSJ7
CVE: NA

-----------------------------------------------

We got issue as follows:
[root@yebin home]# fsck.ext4  -fn  ram0yb
e2fsck 1.45.6 (20-Mar-2020)
Pass 1: Checking inodes, blocks, and sizes
Pass 2: Checking directory structure
Symlink /p3/d14/d1a/l3d (inode #3494) is invalid.
Clear? no
Entry 'l3d' in /p3/d14/d1a (3383) has an incorrect filetype (was 7, should be 0).
Fix? no

As symlink file size not match to file content. If symlink data block writback
failed, will call ext4_finish_bio to end io. In this path don't mark buffer
error. When umount do checkpoint can't detect buffer error, then will cleanup
jounral. Actually, correct data maybe in journal area.
To solve this issue, mark buffer error when detect bio error in ext4_finish_bio.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/page-io.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 9cc79b7b0df1..3de933354a08 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -105,8 +105,10 @@ static void ext4_finish_bio(struct bio *bio)
 				continue;
 			}
 			clear_buffer_async_write(bh);
-			if (bio->bi_status)
+			if (bio->bi_status) {
+				set_buffer_write_io_error(bh);
 				buffer_io_error(bh);
+			}
 		} while ((bh = bh->b_this_page) != head);
 		bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
 		local_irq_restore(flags);
-- 
Gitee


From 06ff79d54e9f2dc74706976176eced61e31a5b54 Mon Sep 17 00:00:00 2001
From: Yu Kuai <yukuai3@huawei.com>
Date: Mon, 21 Mar 2022 08:00:21 +0000
Subject: [PATCH 084/340] blk-throtl: fix race in io dispatching

hulk inclusion
category: bugfix
bugzilla: 186449, https://gitee.com/openeuler/kernel/issues/I4YSPC
CVE: NA

--------------------------------

If io is throttled, such io will be issued by blk_throtl_dispatch_work_fn()
or blk_throtl_drain(), and the io is fetched by throtl_pop_queued().
throtl_pop_queued() should be protected by 'queue_lock', as what
blk_throtl_dispatch_work_fn() does. However, it's not protected in
blk_throtl_drain(), which may lead to concurrent bio_list_pop(), and may
end up crashing the kernel.

Fix the problem by protecting throtl_pop_queued() through 'queue_lock'
in blk_throtl_drain().

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 block/blk-throttle.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 1e6d5631669b..95d28cf8ff0f 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -2422,8 +2422,11 @@ void blk_throtl_drain(struct request_queue *q)
 	struct blkcg_gq *blkg;
 	struct cgroup_subsys_state *pos_css;
 	struct bio *bio;
+	struct bio_list bio_list_on_stack;
 	int rw;
 
+	bio_list_init(&bio_list_on_stack);
+
 	queue_lockdep_assert_held(q);
 	rcu_read_lock();
 
@@ -2440,12 +2443,16 @@ void blk_throtl_drain(struct request_queue *q)
 	tg_drain_bios(&td->service_queue);
 
 	rcu_read_unlock();
-	spin_unlock_irq(q->queue_lock);
 
 	/* all bios now should be in td->service_queue, issue them */
 	for (rw = READ; rw <= WRITE; rw++)
 		while ((bio = throtl_pop_queued(&td->service_queue.queued[rw],
 						NULL)))
+			bio_list_add(&bio_list_on_stack, bio);
+	spin_unlock_irq(q->queue_lock);
+
+	if (!bio_list_empty(&bio_list_on_stack))
+		while ((bio = bio_list_pop(&bio_list_on_stack)))
 			generic_make_request(bio);
 
 	spin_lock_irq(q->queue_lock);
-- 
Gitee


From ee8a1d431a68787cefd21ab451ddc8b62b95d43c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Mon, 21 Mar 2022 09:37:30 +0000
Subject: [PATCH 085/340] fuse: fix pipe buffer lifetime for direct_io

mainline inclusion
from mainline-v5.17-rc8
commit 0c4bcfdecb1ac0967619ee7ff44871d93c08c909
category: bugfix
bugzilla: 186448, https://gitee.com/openeuler/kernel/issues/I4YORE
CVE: CVE-2022-1011

--------------------------------

In FOPEN_DIRECT_IO mode, fuse_file_write_iter() calls
fuse_direct_write_iter(), which normally calls fuse_direct_io(), which then
imports the write buffer with fuse_get_user_pages(), which uses
iov_iter_get_pages() to grab references to userspace pages instead of
actually copying memory.

On the filesystem device side, these pages can then either be read to
userspace (via fuse_dev_read()), or splice()d over into a pipe using
fuse_dev_splice_read() as pipe buffers with &nosteal_pipe_buf_ops.

This is wrong because after fuse_dev_do_read() unlocks the FUSE request,
the userspace filesystem can mark the request as completed, causing write()
to return. At that point, the userspace filesystem should no longer have
access to the pipe buffer.

Fix by copying pages coming from the user address space to new pipe
buffers.

Reported-by: Jann Horn <jannh@google.com>
Fixes: c3021629a0d8 ("fuse: support splice() reading from fuse device")
Cc: <stable@vger.kernel.org>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

Conflicts:
	fs/fuse/file.c
	fs/fuse/fuse_i.h
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Tao Hou <houtao1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/fuse/dev.c    | 12 +++++++++++-
 fs/fuse/file.c   |  6 ++++--
 fs/fuse/fuse_i.h |  3 +++
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8a98ffd5e4b6..bd5293e01e48 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -999,7 +999,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 
 	while (count) {
 		if (cs->write && cs->pipebufs && page) {
-			return fuse_ref_page(cs, page, offset, count);
+			/*
+			 * Can't control lifetime of pipe buffers, so always
+			 * copy user pages.
+			 */
+			if (cs->req->in.user_pages) {
+				err = fuse_copy_fill(cs);
+				if (err)
+					return err;
+			} else {
+				return fuse_ref_page(cs, page, offset, count);
+			}
 		} else if (!cs->len) {
 			if (cs->move_pages && page &&
 			    offset == 0 && count == PAGE_SIZE) {
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 5ecabacabd47..8d238d965db1 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -1321,10 +1321,12 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
 			(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
 	}
 
-	if (write)
+	if (write) {
+		req->in.user_pages = 1;
 		req->in.argpages = 1;
-	else
+	} else {
 		req->out.argpages = 1;
+	}
 
 	*nbytesp = nbytes;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 853d37ec81e0..27f3b6d4f11c 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -174,6 +174,9 @@ struct fuse_in {
 	/** True if the data for the last argument is in req->pages */
 	unsigned argpages:1;
 
+	/** True if direct write */
+	unsigned user_pages:1;
+
 	/** Number of arguments */
 	unsigned numargs;
 
-- 
Gitee


From 108567834dad53630b3684734a60195fe1462bda Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Mon, 21 Mar 2022 09:37:31 +0000
Subject: [PATCH 086/340] kabi: fix kabi broken in struct fuse_in

mainline inclusion
from mainline-v5.17-rc8
commit 0c4bcfdecb1ac0967619ee7ff44871d93c08c909
category: bugfix
bugzilla: 186448, https://gitee.com/openeuler/kernel/issues/I4YORE
CVE: CVE-2022-1011

--------------------------------

Because create a new user_pages in fuse_in, to fix kabi change.

Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Tao Hou <houtao1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/fuse/fuse_i.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 27f3b6d4f11c..0981011c10c6 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -174,8 +174,10 @@ struct fuse_in {
 	/** True if the data for the last argument is in req->pages */
 	unsigned argpages:1;
 
+#ifndef __GENKSYMS__
 	/** True if direct write */
 	unsigned user_pages:1;
+#endif
 
 	/** Number of arguments */
 	unsigned numargs;
-- 
Gitee


From 1b70444ce952d714d51fded4ec1e162428fa539f Mon Sep 17 00:00:00 2001
From: Moritz Fischer <mdf@kernel.org>
Date: Tue, 22 Mar 2022 13:53:07 +0000
Subject: [PATCH 087/340] ACPI/IORT: Do not blindly trust DMA masks from
 firmware

mainline inclusion
from mainline-v5.11-rc6
commit a1df829ead5877d4a1061e976a50e2e665a16f24
category: bugfix
bugzilla: https://gitee.com/openeuler/qemu/issues/I4WE3Y
CVE: NA

--------------------------------

Address issue observed on real world system with suboptimal IORT table
where DMA masks of PCI devices would get set to 0 as result.

iort_dma_setup() would query the root complex'/named component IORT
entry for a DMA mask, and use that over the one the device has been
configured with earlier.

Ideally we want to use the minimum mask of what the IORT contains for
the root complex and what the device was configured with.

Fixes: 5ac65e8c8941 ("ACPI/IORT: Support address size limit for root complexes")
Signed-off-by: Moritz Fischer <mdf@kernel.org>
Reviewed-by: Robin Murphy <robin.murphy@arm.com>
Acked-by: Lorenzo Pieralisi <lorenzo.pieralisi@arm.com>
Link: https://lore.kernel.org/r/20210122012419.95010-1-mdf@kernel.org
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>

 Conflicts:
	drivers/acpi/arm64/iort.c
Signed-off-by: Xiongfeng Wang <wangxiongfeng2@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/acpi/arm64/iort.c | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c
index bd35cfe6776d..320db806994b 100644
--- a/drivers/acpi/arm64/iort.c
+++ b/drivers/acpi/arm64/iort.c
@@ -997,6 +997,11 @@ static int nc_dma_get_range(struct device *dev, u64 *size)
 
 	ncomp = (struct acpi_iort_named_component *)node->node_data;
 
+	if (!ncomp->memory_address_limit) {
+		pr_warn(FW_BUG "Named component missing memory address limit\n");
+		return -EINVAL;
+	}
+
 	*size = ncomp->memory_address_limit >= 64 ? U64_MAX :
 			1ULL<<ncomp->memory_address_limit;
 
@@ -1016,6 +1021,11 @@ static int rc_dma_get_range(struct device *dev, u64 *size)
 
 	rc = (struct acpi_iort_root_complex *)node->node_data;
 
+	if (!rc->memory_address_limit) {
+		pr_warn(FW_BUG "Root complex missing memory address limit\n");
+		return -EINVAL;
+	}
+
 	*size = rc->memory_address_limit >= 64 ? U64_MAX :
 			1ULL<<rc->memory_address_limit;
 
@@ -1072,8 +1082,8 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *dma_size)
 		 * retrieved from firmware.
 		 */
 		dev->bus_dma_mask = mask;
-		dev->coherent_dma_mask = mask;
-		*dev->dma_mask = mask;
+		dev->coherent_dma_mask = min(dev->coherent_dma_mask, mask);
+		*dev->dma_mask = min(*dev->dma_mask, mask);
 	}
 
 	*dma_addr = dmaaddr;
-- 
Gitee


From afea170045a0cb86eb59d5871ff96635e35c8a22 Mon Sep 17 00:00:00 2001
From: Brian Foster <bfoster@redhat.com>
Date: Tue, 22 Mar 2022 13:53:08 +0000
Subject: [PATCH 088/340] xfs: fix use after free in buf log item unlock assert

mainline inclusion
from mainline-v5.1-rc5
commit 4d09807f20462d6edf04f6e98d3d47bcdf7a5e2f
category: bugfix
bugzilla: 186464, https://gitee.com/openeuler/kernel/issues/I4YYIZ

--------------------------------

The xfs_buf_log_item ->iop_unlock() callback asserts that the buffer
is unlocked when either non-stale or aborted. This assert occurs
after the bli refcount has been dropped and the log item potentially
freed. The aborted check is thus a potential use after free. This
problem has been reproduced with KASAN enabled via generic/475.

Fix up xfs_buf_item_unlock() to query aborted state before the bli
reference is dropped to prevent a potential use after free.

Signed-off-by: Brian Foster <bfoster@redhat.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/xfs/xfs_buf_item.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 4309cd07331a..9b37a362de35 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -605,6 +605,8 @@ xfs_buf_item_unlock(
 #if defined(DEBUG) || defined(XFS_WARN)
 	bool			ordered = bip->bli_flags & XFS_BLI_ORDERED;
 	bool			dirty = bip->bli_flags & XFS_BLI_DIRTY;
+	bool			aborted = test_bit(XFS_LI_ABORTED,
+						   &lip->li_flags);
 #endif
 
 	trace_xfs_buf_item_unlock(bip);
@@ -633,7 +635,7 @@ xfs_buf_item_unlock(
 	released = xfs_buf_item_put(bip);
 	if (hold || (stale && !released))
 		return;
-	ASSERT(!stale || test_bit(XFS_LI_ABORTED, &lip->li_flags));
+	ASSERT(!stale || aborted);
 	xfs_buf_relse(bp);
 }
 
-- 
Gitee


From a4e359845552c638c790807d4888fd5b61ad9c7a Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Tue, 22 Mar 2022 13:53:09 +0000
Subject: [PATCH 089/340] xfs: Fix possible null-pointer dereferences in
 xchk_da_btree_block_check_sibling()

mainline inclusion
from mainline-v5.3-rc2
commit afa1d96d1430c2138c545fb76e6dcb21222098d4
category: bugfix
bugzilla: 186464, https://gitee.com/openeuler/kernel/issues/I4YYIZ

--------------------------------

In xchk_da_btree_block_check_sibling(), there is an if statement on
line 274 to check whether ds->state->altpath.blk[level].bp is NULL:
    if (ds->state->altpath.blk[level].bp)

When ds->state->altpath.blk[level].bp is NULL, it is used on line 281:
    xfs_trans_brelse(..., ds->state->altpath.blk[level].bp);
        struct xfs_buf_log_item *bip = bp->b_log_item;
        ASSERT(bp->b_transp == tp);

Thus, possible null-pointer dereferences may occur.

To fix these bugs, ds->state->altpath.blk[level].bp is checked before
being used.

These bugs are found by a static analysis tool STCheck written by us.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/xfs/scrub/dabtree.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index f1260b4bfdee..17b6dde71bfe 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -286,7 +286,11 @@ xchk_da_btree_block_check_sibling(
 	/* Compare upper level pointer to sibling pointer. */
 	if (ds->state->altpath.blk[level].blkno != sibling)
 		xchk_da_set_corrupt(ds, level);
-	xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp);
+	if (ds->state->altpath.blk[level].bp) {
+		xfs_trans_brelse(ds->dargs.trans,
+				ds->state->altpath.blk[level].bp);
+		ds->state->altpath.blk[level].bp = NULL;
+	}
 out:
 	return error;
 }
-- 
Gitee


From 76f51e9eaec87bf30d59e3dbf85785396f199944 Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Tue, 22 Mar 2022 13:53:10 +0000
Subject: [PATCH 090/340] xfs: fix an undefined behaviour in _da3_path_shift

mainline inclusion
from mainline-v5.6-rc4
commit 4982bff1ace1196843f55536fcd4cc119738fe39
category: bugfix
bugzilla: 186464, https://gitee.com/openeuler/kernel/issues/I4YYIZ

--------------------------------

In xfs_da3_path_shift() "blk" can be assigned to state->path.blk[-1] if
state->path.active is 1 (which is a valid state) when it tries to add an
entry to a single dir leaf block and then to shift forward to see if
there's a sibling block that would be a better place to put the new
entry. This causes a UBSAN warning given negative array indices are
undefined behavior in C. In practice the warning is entirely harmless
given that "blk" is never dereferenced in this case, but it is still
better to fix up the warning and slightly improve the code.

 UBSAN: Undefined behaviour in fs/xfs/libxfs/xfs_da_btree.c:1989:14
 index -1 is out of range for type 'xfs_da_state_blk_t [5]'
 Call trace:
  dump_backtrace+0x0/0x2c8
  show_stack+0x20/0x2c
  dump_stack+0xe8/0x150
  __ubsan_handle_out_of_bounds+0xe4/0xfc
  xfs_da3_path_shift+0x860/0x86c [xfs]
  xfs_da3_node_lookup_int+0x7c8/0x934 [xfs]
  xfs_dir2_node_addname+0x2c8/0xcd0 [xfs]
  xfs_dir_createname+0x348/0x38c [xfs]
  xfs_create+0x6b0/0x8b4 [xfs]
  xfs_generic_create+0x12c/0x1f8 [xfs]
  xfs_vn_mknod+0x3c/0x4c [xfs]
  xfs_vn_create+0x34/0x44 [xfs]
  do_last+0xd4c/0x10c8
  path_openat+0xbc/0x2f4
  do_filp_open+0x74/0xf4
  do_sys_openat2+0x98/0x180
  __arm64_sys_openat+0xf8/0x170
  do_el0_svc+0x170/0x240
  el0_sync_handler+0x150/0x250
  el0_sync+0x164/0x180

Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Qian Cai <cai@lca.pw>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>

Conflicts:
	fs/xfs/libxfs/xfs_da_btree.c
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/xfs/libxfs/xfs_da_btree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 376bee94b5dd..2f0c9610dabc 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -1882,7 +1882,8 @@ xfs_da3_path_shift(
 	ASSERT(path != NULL);
 	ASSERT((path->active > 0) && (path->active < XFS_DA_NODE_MAXDEPTH));
 	level = (path->active-1) - 1;	/* skip bottom layer in path */
-	for (blk = &path->blk[level]; level >= 0; blk--, level--) {
+	for (; level >= 0; level--) {
+		blk = &path->blk[level];
 		node = blk->bp->b_addr;
 		dp->d_ops->node_hdr_from_disk(&nodehdr, node);
 		btree = dp->d_ops->node_tree_p(node);
-- 
Gitee


From 0213acd04df8410a630af3f97489117ce9c11c61 Mon Sep 17 00:00:00 2001
From: Luo Meng <luomeng12@huawei.com>
Date: Tue, 22 Mar 2022 13:53:11 +0000
Subject: [PATCH 091/340] io_uring: fix UAF in get_files_struct()

hulk inclusion
category: bugfix
bugzilla: 186337, https://gitee.com/openeuler/kernel/issues/I4XA09
CVE: NA

--------------------------------

If two tasks are running concurrently as follows:
     task1                                        |       task2
io_uring_enter                                    |  io_wqe_worker
  io_submit_sqes                                  |
    io_submit_sqe                                 |
      io_queue_sqe                                |
        io_req_defer                              |
          io_req_defer_prep                       |
            io_prep_work_files                    |
              io_grab_files                       |
                req->work.files = current->files  |
          io_queue_async_work                     |
            __io_queue_async_work                 |
              io_wq_enqueue                       |
                io_wqe_insert_work                |
                                                  |  io_worker_handle_work
                                                  |    io_impersonate_work
                                                  |      current->files = work->files

And then, one of the concurrency UAF can be shown as below:
          free                                          use (task3 ls -l /proc/io_wqe_worker id/fd )
do_exit // tsk = current = work->files            |
  exit_files				          |
    put_files_struct			          |
      tsk->files // tsk->files = work->files      |
	                                          |  iterate_dir
					          |    proc_readfd_common
                                                  |      p = get_proc_task(file_inode(file))
                                                  |       get_files_struct
                                                  |         files = task->files
                                                  |         atomic_inc(&files->count)

The root cause of UAF bugs is when get req->work.files doesn't add refcount.
The mainline commit 0f2122045b94(io_uring: don't rely on weak ->files references)
fixes this problem, based on this commit to resolved the problme.

Signed-off-by: Luo Meng <luomeng12@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/io_uring.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 7240b5423170..a21d1d67d725 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -5613,6 +5613,7 @@ static void __io_clean_op(struct io_kiocb *req)
 			wake_up(&ctx->inflight_wait);
 		spin_unlock_irqrestore(&ctx->inflight_lock, flags);
 		req->flags &= ~REQ_F_INFLIGHT;
+		put_files_struct(req->work.files);
 	}
 }
 
@@ -5990,7 +5991,7 @@ static int io_grab_files(struct io_kiocb *req)
 	if (fcheck(ctx->ring_fd) == ctx->ring_file) {
 		list_add(&req->inflight_entry, &ctx->inflight_list);
 		req->flags |= REQ_F_INFLIGHT;
-		req->work.files = current->files;
+		req->work.files = get_files_struct(current);
 		ret = 0;
 	}
 	spin_unlock_irq(&ctx->inflight_lock);
-- 
Gitee


From b630f785e12895877121724de737020fff8f0f9e Mon Sep 17 00:00:00 2001
From: Yunsheng Lin <linyunsheng@huawei.com>
Date: Tue, 22 Mar 2022 13:53:12 +0000
Subject: [PATCH 092/340] sock: remove one redundant SKB_FRAG_PAGE_ORDER macro

mainline inclusion
from mainline-v5.15-rc1
commit 723783d077e39c256a1fafebbd97cbb14207c28f
category: bugfix
bugzilla: 186409, https://gitee.com/openeuler/kernel/issues/I4Z0V2
CVE: CVE-2022-0886

--------------------------------

Both SKB_FRAG_PAGE_ORDER are defined to the same value in
net/core/sock.c and drivers/vhost/net.c.

Move the SKB_FRAG_PAGE_ORDER definition to net/core/sock.h,
as both net/core/sock.c and drivers/vhost/net.c include it,
and it seems a reasonable file to put the macro.

Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Xu Jia <xujia39@huawei.com>
conflict:
	drivers/vhost/net.c
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/sock.h | 2 ++
 net/core/sock.c    | 3 ---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 803464e66e02..b001ddbed3f3 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2584,6 +2584,8 @@ extern int sysctl_optmem_max;
 extern __u32 sysctl_wmem_default;
 extern __u32 sysctl_rmem_default;
 
+#define SKB_FRAG_PAGE_ORDER	get_order(32768)
+
 static inline int sk_get_wmem0(const struct sock *sk, const struct proto *proto)
 {
 	/* Does this proto have per netns sysctl_wmem ? */
diff --git a/net/core/sock.c b/net/core/sock.c
index 66a21528c739..246abac5e8d2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2229,9 +2229,6 @@ static void sk_leave_memory_pressure(struct sock *sk)
 	}
 }
 
-/* On 32bit arches, an skb frag is limited to 2^15 */
-#define SKB_FRAG_PAGE_ORDER	get_order(32768)
-
 /**
  * skb_page_frag_refill - check that a page_frag contains enough room
  * @sz: minimum size of the fragment we want to get
-- 
Gitee


From 019f9120dc4f9e5598519eb65f824d8a588c6dd8 Mon Sep 17 00:00:00 2001
From: Steffen Klassert <steffen.klassert@secunet.com>
Date: Tue, 22 Mar 2022 13:53:13 +0000
Subject: [PATCH 093/340] esp: Fix possible buffer overflow in ESP
 transformation

mainline inclusion
from mainline
commit ebe48d368e97d007bfeb76fcb065d6cfc4c96645
category: bugfix
bugzilla: 186409, https://gitee.com/openeuler/kernel/issues/I4Z0V2
CVE: CVE-2022-0886

--------------------------------

The maximum message size that can be send is bigger than
the  maximum site that skb_page_frag_refill can allocate.
So it is possible to write beyond the allocated buffer.

Fix this by doing a fallback to COW in that case.

v2:

Avoid get get_order() costs as suggested by Linus Torvalds.

Fixes: cac2661c53f3 ("esp4: Avoid skb_cow_data whenever possible")
Fixes: 03e2a30f6a27 ("esp6: Avoid skb_cow_data whenever possible")
Reported-by: valis <sec@valis.email>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/esp.h | 2 ++
 net/ipv4/esp4.c   | 5 +++++
 net/ipv6/esp6.c   | 5 +++++
 3 files changed, 12 insertions(+)

diff --git a/include/net/esp.h b/include/net/esp.h
index 117652eb6ea3..465e38890ee9 100644
--- a/include/net/esp.h
+++ b/include/net/esp.h
@@ -4,6 +4,8 @@
 
 #include <linux/skbuff.h>
 
+#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
+
 struct ip_esp_hdr;
 
 static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0792a9e2a555..45bb5b60b994 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -275,6 +275,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
+	unsigned int allocsz;
 
 	/* this is non-NULL only with UDP Encapsulation */
 	if (x->encap) {
@@ -284,6 +285,10 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
 			return err;
 	}
 
+	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+		goto cow;
+
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 25317d5ccf2c..e3abfc97fde7 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -241,6 +241,11 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
+	unsigned int allocsz;
+
+	allocsz = ALIGN(skb->data_len + tailen, L1_CACHE_BYTES);
+	if (allocsz > ESP_SKB_FRAG_MAXSIZE)
+		goto cow;
 
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
-- 
Gitee


From 04c20fc89eb4c7b9ce6ecc5695538f4ef3b45ac2 Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Wed, 23 Mar 2022 11:38:31 +0000
Subject: [PATCH 094/340] swiotlb: fix info leak with DMA_FROM_DEVICE

mainline inclusion
from mainline-v5.17-rc6
commit ddbd89deb7d32b1fbb879f48d68fda1a8ac58e8e
category: bugfix
bugzilla: 186478, https://gitee.com/openeuler/kernel/issues/I4Z86P
CVE: CVE-2022-0854

--------------------------------

The problem I'm addressing was discovered by the LTP test covering
cve-2018-1000204.

A short description of what happens follows:
1) The test case issues a command code 00 (TEST UNIT READY) via the SG_IO
   interface with: dxfer_len == 524288, dxdfer_dir == SG_DXFER_FROM_DEV
   and a corresponding dxferp. The peculiar thing about this is that TUR
   is not reading from the device.
2) In sg_start_req() the invocation of blk_rq_map_user() effectively
   bounces the user-space buffer. As if the device was to transfer into
   it. Since commit a45b599ad808 ("scsi: sg: allocate with __GFP_ZERO in
   sg_build_indirect()") we make sure this first bounce buffer is
   allocated with GFP_ZERO.
3) For the rest of the story we keep ignoring that we have a TUR, so the
   device won't touch the buffer we prepare as if the we had a
   DMA_FROM_DEVICE type of situation. My setup uses a virtio-scsi device
   and the  buffer allocated by SG is mapped by the function
   virtqueue_add_split() which uses DMA_FROM_DEVICE for the "in" sgs (here
   scatter-gather and not scsi generics). This mapping involves bouncing
   via the swiotlb (we need swiotlb to do virtio in protected guest like
   s390 Secure Execution, or AMD SEV).
4) When the SCSI TUR is done, we first copy back the content of the second
   (that is swiotlb) bounce buffer (which most likely contains some
   previous IO data), to the first bounce buffer, which contains all
   zeros.  Then we copy back the content of the first bounce buffer to
   the user-space buffer.
5) The test case detects that the buffer, which it zero-initialized,
  ain't all zeros and fails.

One can argue that this is an swiotlb problem, because without swiotlb
we leak all zeros, and the swiotlb should be transparent in a sense that
it does not affect the outcome (if all other participants are well
behaved).

Copying the content of the original buffer into the swiotlb buffer is
the only way I can think of to make swiotlb transparent in such
scenarios. So let's do just that if in doubt, but allow the driver
to tell us that the whole mapped buffer is going to be overwritten,
in which case we can preserve the old behavior and avoid the performance
impact of the extra bounce.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Conflicts:
	Documentation/core-api/dma-attributes.rst
	include/linux/dma-mapping.h
	kernel/dma/swiotlb.c
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/DMA-attributes.txt | 8 ++++++++
 include/linux/dma-mapping.h      | 8 ++++++++
 kernel/dma/swiotlb.c             | 3 ++-
 3 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 8f8d97f65d73..34742fc00ec0 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -156,3 +156,11 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
+
+DMA_ATTR_OVERWRITE
+------------------
+
+This is a hint to the DMA-mapping subsystem that the device is expected to
+overwrite the entire mapped size, thus the caller does not require any of the
+previous buffer contents to be preserved. This allows bounce-buffering
+implementations to optimise DMA_FROM_DEVICE transfers.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 9d39d97e09f4..aa23cade6991 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,6 +70,14 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
+/*
+ * This is a hint to the DMA-mapping subsystem that the device is expected
+ * to overwrite the entire mapped size, thus the caller does not require any
+ * of the previous buffer contents to be preserved. This allows
+ * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
+ */
+#define DMA_ATTR_OVERWRITE		(1UL << 10)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 2a8c41f12d45..1624d12b60af 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -584,7 +584,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
 	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+	    (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
+	    dir == DMA_BIDIRECTIONAL))
 		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 
 	return tlb_addr;
-- 
Gitee


From 3f80e18671eff79521b2eab06526446c40bf71bf Mon Sep 17 00:00:00 2001
From: Halil Pasic <pasic@linux.ibm.com>
Date: Wed, 23 Mar 2022 11:38:32 +0000
Subject: [PATCH 095/340] swiotlb: rework "fix info leak with DMA_FROM_DEVICE"

mainline inclusion
from mainline-v5.17-rc8
commit aa6f8dcbab473f3a3c7454b74caa46d36cdc5d13
category: bugfix
bugzilla: 186478, https://gitee.com/openeuler/kernel/issues/I4Z86P
CVE: CVE-2022-0854

--------------------------------

Unfortunately, we ended up merging an old version of the patch "fix info
leak with DMA_FROM_DEVICE" instead of merging the latest one. Christoph
(the swiotlb maintainer), he asked me to create an incremental fix
(after I have pointed this out the mix up, and asked him for guidance).
So here we go.

The main differences between what we got and what was agreed are:
* swiotlb_sync_single_for_device is also required to do an extra bounce
* We decided not to introduce DMA_ATTR_OVERWRITE until we have exploiters
* The implantation of DMA_ATTR_OVERWRITE is flawed: DMA_ATTR_OVERWRITE
  must take precedence over DMA_ATTR_SKIP_CPU_SYNC

Thus this patch removes DMA_ATTR_OVERWRITE, and makes
swiotlb_sync_single_for_device() bounce unconditionally (that is, also
when dir == DMA_TO_DEVICE) in order do avoid synchronising back stale
data from the swiotlb buffer.

Let me note, that if the size used with dma_sync_* API is less than the
size used with dma_[un]map_*, under certain circumstances we may still
end up with swiotlb not being transparent. In that sense, this is no
perfect fix either.

To get this bullet proof, we would have to bounce the entire
mapping/bounce buffer. For that we would have to figure out the starting
address, and the size of the mapping in
swiotlb_sync_single_for_device(). While this does seem possible, there
seems to be no firm consensus on how things are supposed to work.

Signed-off-by: Halil Pasic <pasic@linux.ibm.com>
Fixes: ddbd89deb7d3 ("swiotlb: fix info leak with DMA_FROM_DEVICE")
Cc: stable@vger.kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Conflicts:
	Documentation/core-api/dma-attributes.rst
	include/linux/dma-mapping.h
	kernel/dma/swiotlb.c
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/DMA-attributes.txt |  8 --------
 include/linux/dma-mapping.h      |  8 --------
 kernel/dma/swiotlb.c             | 26 ++++++++++++++++----------
 3 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 34742fc00ec0..8f8d97f65d73 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -156,11 +156,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
-
-DMA_ATTR_OVERWRITE
-------------------
-
-This is a hint to the DMA-mapping subsystem that the device is expected to
-overwrite the entire mapped size, thus the caller does not require any of the
-previous buffer contents to be preserved. This allows bounce-buffering
-implementations to optimise DMA_FROM_DEVICE transfers.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index aa23cade6991..9d39d97e09f4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,14 +70,6 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
-/*
- * This is a hint to the DMA-mapping subsystem that the device is expected
- * to overwrite the entire mapped size, thus the caller does not require any
- * of the previous buffer contents to be preserved. This allows
- * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
- */
-#define DMA_ATTR_OVERWRITE		(1UL << 10)
-
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1624d12b60af..b6eee8ea867b 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -583,11 +583,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	 */
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
-	    dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
-
+	/*
+	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+	 * to the tlb buffer, if we knew for sure the device will
+	 * overwirte the entire current content. But we don't. Thus
+	 * unconditional bounce may prevent leaking swiotlb content (i.e.
+	 * kernel memory) to user-space.
+	 */
+	swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 	return tlb_addr;
 }
 
@@ -679,11 +682,14 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
-		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
-			swiotlb_bounce(orig_addr, tlb_addr,
-				       size, DMA_TO_DEVICE);
-		else
-			BUG_ON(dir != DMA_FROM_DEVICE);
+		/*
+		 * Unconditional bounce is necessary to avoid corruption on
+		 * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite
+		 * the whole lengt of the bounce buffer.
+		 */
+		swiotlb_bounce(orig_addr, tlb_addr,
+			       size, DMA_TO_DEVICE);
+		BUG_ON(!valid_dma_direction(dir));
 		break;
 	default:
 		BUG();
-- 
Gitee


From c7c20ad04c0ff233149e7c5108344bfa19c88a3a Mon Sep 17 00:00:00 2001
From: Kefeng Wang <wangkefeng.wang@huawei.com>
Date: Thu, 24 Mar 2022 04:19:04 +0000
Subject: [PATCH 096/340] hugetlb: Add huge page alloced limit

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4YTLN
CVE: NA

--------------------------------

The user wants to reserve a certain amount of memory for normal
non-huge page, that is, the hugetlb can't allowed to use all the
memory.

Add a new kernel parameters "hugepage_prohibit_sz=" to set size
for normal non-huge page reserved, and when alloc huge page,
let's fail if the new allocating exceeds the limit.

Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Peng Liu <liupeng256@huawei.com>
Reviewed-by: Chen Wandun <chenwandun@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 .../admin-guide/kernel-parameters.txt         |  7 ++
 mm/hugetlb.c                                  | 73 +++++++++++++++++++
 2 files changed, 80 insertions(+)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 2cb6c844d751..170e25b192a1 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1405,6 +1405,13 @@
 	hpet_mmap=	[X86, HPET_MMAP] Allow userspace to mmap HPET
 			registers.  Default set by CONFIG_HPET_MMAP_DEFAULT.
 
+	hugepage_prohibit_sz=
+			[HW] HugeTLB pages should not alloc when the rest of
+			the normal pages less than hugepage_prohibit_sz. This
+			setting is to make sure a system can start even when
+			part of physical memory is broken, admin users can
+			adjust this according to typical environment.
+
 	hugepages=	[HW,X86-32,IA-64] HugeTLB pages to allocate at boot.
 			If using node format, the number of pages to allocate
 			per-node can be specified.
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 68cec97bbd06..5b6a050fd6c1 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -1444,6 +1444,33 @@ pgoff_t hugetlb_basepage_index(struct page *page)
 	return (index << compound_order(page_head)) + compound_idx;
 }
 
+#define HUGE_PAGE_BOOTMEM_ALLOC		0
+#define HUGE_PAGE_FRESH_ALLOC		1
+
+static u64 normal_page_reserve_sz;
+
+static int __init early_normal_page_reserve(char *p)
+{
+	unsigned long long size;
+
+	if (!p)
+		return 1;
+
+	size = memparse(p, &p);
+	if (*p) {
+		pr_warn("HugeTLB: Invalid normal page reserved size\n");
+		return 1;
+	}
+
+	normal_page_reserve_sz = size & PAGE_MASK;
+
+	pr_info("HugeTLB: Normal page reserved %lldMB\n",
+		normal_page_reserve_sz >> 20);
+
+	return 0;
+}
+early_param("hugepage_prohibit_sz", early_normal_page_reserve);
+
 static struct page *alloc_buddy_huge_page(struct hstate *h,
 		gfp_t gfp_mask, int nid, nodemask_t *nmask,
 		nodemask_t *node_alloc_noretry)
@@ -1491,6 +1518,45 @@ static struct page *alloc_buddy_huge_page(struct hstate *h,
 	return page;
 }
 
+static bool __ref huge_page_limit_check(int type, size_t hsize, int nid)
+{
+	u64 mem_usable = 0;
+	char *str = NULL;
+	char buf[32];
+
+	if (!normal_page_reserve_sz)
+		return true;
+
+	if (system_state > SYSTEM_SCHEDULING)
+		return true;
+
+	if (normal_page_reserve_sz >= memblock_phys_mem_size()) {
+		mem_usable = memblock_phys_mem_size();
+		str = "physical memory";
+		goto out;
+	}
+
+	if (type == HUGE_PAGE_BOOTMEM_ALLOC) {
+		mem_usable = memblock_phys_mem_size() - memblock_reserved_size();
+		str = "memblock usable";
+	} else if (type == HUGE_PAGE_FRESH_ALLOC) {
+		mem_usable = nr_free_pages() << PAGE_SHIFT;
+		str = "free page";
+	}
+
+	if (mem_usable < normal_page_reserve_sz + hsize)
+		goto out;
+
+	return true;
+out:
+	string_get_size(hsize, 1, STRING_UNITS_2, buf, 32);
+	pr_info("HugeTLB: allocating(%s) + Normal pages reserved(%lldMB) node%d exceed %s size(%lldMB)\n",
+		buf, normal_page_reserve_sz >> 20,
+		nid, str, mem_usable >> 20);
+
+	return false;
+}
+
 /*
  * Common helper to allocate a fresh hugetlb page. All specific allocators
  * should use this function to get new hugetlb pages
@@ -1501,6 +1567,9 @@ static struct page *alloc_fresh_huge_page(struct hstate *h,
 {
 	struct page *page;
 
+	if (!huge_page_limit_check(HUGE_PAGE_FRESH_ALLOC, huge_page_size(h), nid))
+		return NULL;
+
 	if (hstate_is_gigantic(h))
 		page = alloc_gigantic_page(h, gfp_mask, nid, nmask);
 	else
@@ -2251,6 +2320,10 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
 
 	if (nid != NUMA_NO_NODE && nid >= nr_online_nodes)
 		return 0;
+
+	if (!huge_page_limit_check(HUGE_PAGE_BOOTMEM_ALLOC, huge_page_size(h), nid))
+		return 0;
+
 	/* do node specific alloc */
 	if (nid != NUMA_NO_NODE) {
 		m = memblock_virt_alloc_try_nid_raw(huge_page_size(h), huge_page_size(h),
-- 
Gitee


From b253ac1c874e3fa28ffbd41a724420a784c9062d Mon Sep 17 00:00:00 2001
From: Haimin Zhang <tcs.kernel@gmail.com>
Date: Thu, 24 Mar 2022 04:19:05 +0000
Subject: [PATCH 097/340] block-map: add __GFP_ZERO flag for alloc_page in
 function bio_copy_kern

mainline inclusion
from mainline-v5.17-rc5
commit cc8f7fe1f5eab010191aa4570f27641876fa1267
category: bugfix
bugzilla: 186474, https://gitee.com/openeuler/kernel/issues/I4Z2LA
CVE: CVE-2022-0494

--------------------------------

Add __GFP_ZERO flag for alloc_page in function bio_copy_kern to initialize
the buffer of a bio.

Signed-off-by: Haimin Zhang <tcs.kernel@gmail.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20220216084038.15635-1-tcs.kernel@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>

Conflict: commit ce288e053568 ("block: remove BLK_BOUNCE_ISA support")
is not backported.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 block/bio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bio.c b/block/bio.c
index b50d3b59c79b..6457cbfa70cc 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1528,7 +1528,7 @@ struct bio *bio_copy_kern(struct request_queue *q, void *data, unsigned int len,
 		if (bytes > len)
 			bytes = len;
 
-		page = alloc_page(q->bounce_gfp | gfp_mask);
+		page = alloc_page(q->bounce_gfp | __GFP_ZERO | gfp_mask);
 		if (!page)
 			goto cleanup;
 
-- 
Gitee


From 11e2bfddce4a40a3c996f1eef7e563c03b16839c Mon Sep 17 00:00:00 2001
From: Xingang Wang <wangxingang5@huawei.com>
Date: Sat, 26 Mar 2022 09:38:38 +0800
Subject: [PATCH 098/340] arm64/mpam: fix __mpam_device_create() section
 mismatch error

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I49RB2
CVE: NA

---------------------------------------------------

Fix modpost Section mismatch error in __mpam_device_create() and others.
These warnings will occur in high version gcc, for example 10.1.0.

  [...]
  WARNING: vmlinux.o(.text+0x2ed88): Section mismatch in reference from the
  function __mpam_device_create() to the function .init.text:mpam_device_alloc()
  The function __mpam_device_create() references
  the function __init mpam_device_alloc().
  This is often because __mpam_device_create lacks a __init
  annotation or the annotation of mpam_device_alloc is wrong.

  WARNING: vmlinux.o(.text.unlikely+0xa5c): Section mismatch in reference from
  the function mpam_resctrl_init() to the function .init.text:mpam_init_padding()
  The function mpam_resctrl_init() references
  the function __init mpam_init_padding().
  This is often because mpam_resctrl_init lacks a __init
  annotation or the annotation of mpam_init_padding is wrong.

  WARNING: vmlinux.o(.text.unlikely+0x5a9c): Section mismatch in reference from
  the function resctrl_group_init() to the function .init.text:resctrl_group_setup_root()
  The function resctrl_group_init() references
  the function __init resctrl_group_setup_root().
  This is often because resctrl_group_init lacks a __init
  annotation or the annotation of resctrl_group_setup_root is wrong.
  [...]

Fixes: c5e27c395a70 ("arm64/mpam: remove __init macro to support driver probe")
Signed-off-by: Xingang Wang <wangxingang5@huawei.com>
Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 arch/arm64/kernel/mpam/mpam_device.c  | 8 ++++----
 arch/arm64/kernel/mpam/mpam_resctrl.c | 2 +-
 fs/resctrlfs.c                        | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c
index 4e882e81cf41..5d34751f8c3e 100644
--- a/arch/arm64/kernel/mpam/mpam_device.c
+++ b/arch/arm64/kernel/mpam/mpam_device.c
@@ -621,7 +621,7 @@ static void mpam_failed(struct work_struct *work)
 	mutex_unlock(&mpam_cpuhp_lock);
 }
 
-static struct mpam_device * __init
+static struct mpam_device *
 mpam_device_alloc(struct mpam_component *comp)
 {
 	struct mpam_device *dev;
@@ -656,7 +656,7 @@ static void mpam_devices_destroy(struct mpam_component *comp)
 	}
 }
 
-static struct mpam_component * __init mpam_component_alloc(int id)
+static struct mpam_component *mpam_component_alloc(int id)
 {
 	struct mpam_component *comp;
 
@@ -694,7 +694,7 @@ struct mpam_component *mpam_component_get(struct mpam_class *class, int id,
 	return comp;
 }
 
-static struct mpam_class * __init mpam_class_alloc(u8 level_idx,
+static struct mpam_class *mpam_class_alloc(u8 level_idx,
 			enum mpam_class_types type)
 {
 	struct mpam_class *class;
@@ -733,7 +733,7 @@ static void mpam_class_destroy(struct mpam_class *class)
 	}
 }
 
-static struct mpam_class * __init mpam_class_get(u8 level_idx,
+static struct mpam_class *mpam_class_get(u8 level_idx,
 						enum mpam_class_types type,
 						bool alloc)
 {
diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c
index fe2dcf92100f..183c83d4274f 100644
--- a/arch/arm64/kernel/mpam/mpam_resctrl.c
+++ b/arch/arm64/kernel/mpam/mpam_resctrl.c
@@ -1135,7 +1135,7 @@ void closid_free(int closid)
  * Choose a width for the resource name and resource data based on the
  * resource that has widest name and cbm.
  */
-static __init void mpam_init_padding(void)
+static void mpam_init_padding(void)
 {
 	int cl;
 	struct mpam_resctrl_res *res;
diff --git a/fs/resctrlfs.c b/fs/resctrlfs.c
index ea9df7d77b95..0e6753012140 100644
--- a/fs/resctrlfs.c
+++ b/fs/resctrlfs.c
@@ -989,7 +989,7 @@ static void resctrl_group_default_init(struct resctrl_group *r)
 	r->type = RDTCTRL_GROUP;
 }
 
-static int __init resctrl_group_setup_root(void)
+static int resctrl_group_setup_root(void)
 {
 	int ret;
 
-- 
Gitee


From 33dea90eb1047c6648ca441e15312996efd6e3fe Mon Sep 17 00:00:00 2001
From: Xingang Wang <wangxingang5@huawei.com>
Date: Sat, 26 Mar 2022 09:38:39 +0800
Subject: [PATCH 099/340] arm64/mpam: refactor device tree structure to support
 multiple devices

ascend inclusion
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I49RB2
CVE: NA

---------------------------------------------------

The process of MPAM device tree initialization is like this:
arm_mpam_device_probe() 	// driver probe
  mpam_discovery_start()	// start discover mpam devices
    [...] 			// find and add mpam devices
  mpam_discovery_complete()   	// trigger mpam_enable

When there are multiple mpam device nodes, the driver probe procedure
will execute more than once. However, the mpam_discovery_start() and
mpam_discovery_complete() should only run once. Besides, the start
should run first, and the complete should run after all devices added.

So we reorganize the device tree structure, so that there will be only
one mpam device parent nodes, and the probe procedure will only run once.
We add the child node to represent the mpam devices, and traverse and
add all mpam devices in the middle procedure of driver probe.

Signed-off-by: Xingang Wang <wangxingang5@huawei.com>
Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 arch/arm64/kernel/mpam/mpam_device.c | 59 +++++++++++++++-------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/arch/arm64/kernel/mpam/mpam_device.c b/arch/arm64/kernel/mpam/mpam_device.c
index 5d34751f8c3e..6455c69f132f 100644
--- a/arch/arm64/kernel/mpam/mpam_device.c
+++ b/arch/arm64/kernel/mpam/mpam_device.c
@@ -34,6 +34,7 @@
 #include <linux/arm_mpam.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
+#include <linux/of_address.h>
 
 #include "mpam_resource.h"
 #include "mpam_device.h"
@@ -1718,10 +1719,9 @@ static const struct of_device_id arm_mpam_of_device_ids[] = {
 	{ }
 };
 
-static int of_mpam_parse_irq(struct platform_device *pdev,
+static int of_mpam_parse_irq(struct device_node *node,
 			     struct mpam_device *dev)
 {
-	struct device_node *node = pdev->dev.of_node;
 	u32 overflow_interrupt, overflow_flags;
 	u32 error_interrupt, error_interrupt_flags;
 
@@ -1736,12 +1736,12 @@ static int of_mpam_parse_irq(struct platform_device *pdev,
 			error_interrupt, error_interrupt_flags);
 }
 
-static int of_mpam_parse_cache(struct platform_device *pdev)
+static int of_mpam_parse_cache(struct platform_device *pdev,
+		struct device_node *node)
 {
 	struct mpam_device *dev;
-	struct device_node *node = pdev->dev.of_node;
 	int cache_level, cache_id;
-	struct resource *res;
+	u64 reg_value[2];
 
 	if (of_property_read_u32(node, "cache-level", &cache_level)) {
 		dev_err(&pdev->dev, "missing cache level property\n");
@@ -1754,27 +1754,27 @@ static int of_mpam_parse_cache(struct platform_device *pdev)
 	}
 
 	/* Base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
+	if (of_property_read_u64_array(node, "reg", reg_value, 2)) {
 		dev_err(&pdev->dev, "missing io resource property\n");
 		return -EINVAL;
 	}
 
-	dev = mpam_device_create_cache(cache_level, cache_id, NULL, res->start);
+	dev = mpam_device_create_cache(cache_level, cache_id, NULL,
+				       reg_value[0]);
 	if (IS_ERR(dev)) {
 		dev_err(&pdev->dev, "Failed to create cache node\n");
 		return -EINVAL;
 	}
 
-	return of_mpam_parse_irq(pdev, dev);
+	return of_mpam_parse_irq(node, dev);
 }
 
-static int of_mpam_parse_memory(struct platform_device *pdev)
+static int of_mpam_parse_memory(struct platform_device *pdev,
+		struct device_node *node)
 {
 	struct mpam_device *dev;
-	struct device_node *node = pdev->dev.of_node;
 	int numa_id;
-	struct resource *res;
+	u64 reg_value[2];
 
 	if (of_property_read_u32(node, "numa-node-id", &numa_id)) {
 		dev_err(&pdev->dev, "missing numa node id property\n");
@@ -1782,40 +1782,35 @@ static int of_mpam_parse_memory(struct platform_device *pdev)
 	}
 
 	/* Base address */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
+	if (of_property_read_u64_array(node, "reg", reg_value, 2)) {
 		dev_err(&pdev->dev, "missing io resource property\n");
 		return -EINVAL;
 	}
 
-	dev = mpam_device_create_memory(numa_id, res->start);
+	dev = mpam_device_create_memory(numa_id, reg_value[0]);
 	if (IS_ERR(dev)) {
 		dev_err(&pdev->dev, "Failed to create memory node\n");
 		return -EINVAL;
 	}
 
-	return of_mpam_parse_irq(pdev, dev);
+	return of_mpam_parse_irq(node, dev);
 }
 
-static int of_mpam_parse(struct platform_device *pdev)
+static int of_mpam_add_child(struct platform_device *pdev,
+		struct device_node *node)
 {
-	struct device *dev = &pdev->dev;
-	struct device_node *node = dev->of_node;
 	enum mpam_class_types type;
 
-	if (!node || !of_match_node(arm_mpam_of_device_ids, pdev->dev.of_node))
-		return -EINVAL;
-
-	if (of_property_read_u32(dev->of_node, "type", &type)) {
-		dev_err(dev, "missing type property\n");
+	if (of_property_read_u32(node, "type", &type)) {
+		dev_err(&pdev->dev, "missing type property\n");
 		return -EINVAL;
 	}
 
 	switch (type) {
 	case MPAM_CLASS_CACHE:
-		return of_mpam_parse_cache(pdev);
+		return of_mpam_parse_cache(pdev, node);
 	case MPAM_CLASS_MEMORY:
-		return of_mpam_parse_memory(pdev);
+		return of_mpam_parse_memory(pdev, node);
 	default:
 		pr_warn_once("Unknown node type %u.\n", type);
 		return -EINVAL;
@@ -1833,6 +1828,9 @@ static int of_mpam_parse(struct platform_device *pdev)
 static int arm_mpam_device_probe(struct platform_device *pdev)
 {
 	int ret;
+	struct device *dev = &pdev->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *child = NULL;
 
 	if (!cpus_have_const_cap(ARM64_HAS_MPAM))
 		return 0;
@@ -1840,11 +1838,18 @@ static int arm_mpam_device_probe(struct platform_device *pdev)
 	if (!acpi_disabled || mpam_enabled != MPAM_ENABLE_OF)
 		return 0;
 
+	if (!node || !of_match_node(arm_mpam_of_device_ids, pdev->dev.of_node))
+		return -EINVAL;
+
 	ret = mpam_discovery_start();
 	if (ret)
 		return ret;
 
-	ret = of_mpam_parse(pdev);
+	for_each_available_child_of_node(node, child) {
+		ret = of_mpam_add_child(pdev, child);
+		if (ret)
+			break;
+	}
 
 	if (ret) {
 		mpam_discovery_failed();
-- 
Gitee


From 880b19479afc21334003024addc19e8a8fedf735 Mon Sep 17 00:00:00 2001
From: Xingang Wang <wangxingang5@huawei.com>
Date: Sat, 26 Mar 2022 09:38:40 +0800
Subject: [PATCH 100/340] dt-bindings: mpam: refactor device tree node
 structure

arm64/mpam: refactor device tree structure to support multiple
devices

ascend inclusion
category: feature
bugzilla:
https://gitee.com/openeuler/kernel/issues/I49RB2
CVE: NA

---------------------------------------------------

To support multiple mpam device nodes, all nodes should be organized
as child of the same parent nodes. This makes sure that the mpam
discovery start and complete procedure in the right execution order.
Add modification in the devicetree documentation to record this.

Signed-off-by: Xingang Wang <wangxingang5@huawei.com>
Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 .../devicetree/bindings/arm/arm,mpam.txt      | 45 ++++++++++---------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/Documentation/devicetree/bindings/arm/arm,mpam.txt b/Documentation/devicetree/bindings/arm/arm,mpam.txt
index 65c1e6809685..e9ba09bb3159 100644
--- a/Documentation/devicetree/bindings/arm/arm,mpam.txt
+++ b/Documentation/devicetree/bindings/arm/arm,mpam.txt
@@ -28,27 +28,30 @@ and Monitoring (MPAM), for Armv8-A", MPAM interrupts(section 8.8).
 
 Example:
 
-mpam_memory0 {
+mpam {
 	compatible = "arm,mpam";
-	reg = <0x0 0x10000000 0x0 0x10000>;
-	type = <2>; /* memory type */
-	numa-node-id = <0>;
-	overflow-interrupt = <0>;
-	overflow-flags = <0>;
-	error-interrupt = <0>;
-	error-interrupt-flags = <0>;
-	not-ready-max = <0>;
-};
 
-mpam_cache0 {
-	compatible = "arm,mpam";
-	reg = <0x0 0x20000000 0x0 0x10000>;
-	type = <1>; /* cache type */
-	cache-id = <0>;
-	cache-level = <3>;
-	overflow-interrupt = <0>;
-	overflow-flags = <0>;
-	error-interrupt = <0>;
-	error-interrupt-flags = <0>;
-	not-ready-max = <0>;
+	mpam_memory0 {
+		reg = <0x0 0x10000000 0x0 0x10000>;
+		type = <2>; /* memory type */
+		numa-node-id = <0>;
+		overflow-interrupt = <0>;
+		overflow-flags = <0>;
+		error-interrupt = <0>;
+		error-interrupt-flags = <0>;
+		not-ready-max = <0>;
+	};
+
+	mpam_cache0 {
+		reg = <0x0 0x20000000 0x0 0x10000>;
+		type = <1>; /* cache type */
+		cache-id = <0>;
+		cache-level = <3>;
+		overflow-interrupt = <0>;
+		overflow-flags = <0>;
+		error-interrupt = <0>;
+		error-interrupt-flags = <0>;
+		not-ready-max = <0>;
+	};
+
 };
-- 
Gitee


From e79aa56b54e9bf6a2f314961c9872f71634f040c Mon Sep 17 00:00:00 2001
From: Wang ShaoBo <bobo.shaobowang@huawei.com>
Date: Sat, 26 Mar 2022 09:38:41 +0800
Subject: [PATCH 101/340] arm64/mpam: realign step entry when traversing
 rmid_transform

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SE03
CVE: NA

---------------------------------------------------

This makes step entry aligned with step_size*step_cnt but not step_size,
and check for alignment before traversing rmid_transform.

When modifying rmid with a value not aligned with step_size*step_cnt,
for_each_rmid_transform_point_step_from might miss next step point if
it has been occupied in case step_cnt or step_size not equals to 1,
which will cause the actual allocated rmid to be inconsistent with the
expected one.

Fixes: b47b9a817da1 ("arm64/mpam: rmid: refine allocation and release process")
Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com>
Reviewed-by: Cheng Jian <cj.chengjian@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 arch/arm64/kernel/mpam/mpam_resctrl.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/kernel/mpam/mpam_resctrl.c b/arch/arm64/kernel/mpam/mpam_resctrl.c
index 183c83d4274f..60d3d8706a38 100644
--- a/arch/arm64/kernel/mpam/mpam_resctrl.c
+++ b/arch/arm64/kernel/mpam/mpam_resctrl.c
@@ -839,7 +839,8 @@ static inline unsigned long **__rmid_remap_bmp(u32 col)
 #define __step_xy_initialize(step, x, y, from)		\
 	(x = from, step = 1, y = 0)
 #define __step_align(from)				\
-	(!(from % rmid_remap_matrix.step_size))
+	(!(from % (rmid_remap_matrix.step_size *	\
+		rmid_remap_matrix.step_cnt)))
 #define __step_overflow(step)				\
 	(__xy_overflow(x, y) ||				\
 		(step > rmid_remap_matrix.step_cnt))
@@ -913,7 +914,7 @@ static int is_rmid_remap_bmp_full(unsigned long *bmp)
 			bitmap_full(bmp, rmid_remap_matrix.rows));
 }
 
-static int rmid_remap_bmp_find_first_avail_partid(int partid)
+static int rmid_remap_bmp_find_step_entry(int partid)
 {
 	int x, y;
 	unsigned long **bmp;
@@ -922,17 +923,18 @@ static int rmid_remap_bmp_find_first_avail_partid(int partid)
 		rmid_remap_matrix.cols)
 		return 0;
 
+	/* step entry should be non-occupied and aligned */
 	bmp = __rmid_remap_bmp(partid);
-	if (bmp && !is_rmid_remap_bmp_occ(*bmp))
-		return partid;
+	if (bmp)
+		return (is_rmid_remap_bmp_occ(*bmp) ||
+			!__step_align(partid)) ? -ENOSPC : partid;
 
 	for_each_rmid_transform_point_from(bmp, x, y, 0) {
 		/*
 		 * do not waste partid resource, start
-		 * from step_size aligned position.
+		 * from step aligned position.
 		 */
-		if (!is_rmid_remap_bmp_occ(*bmp) &&
-			(x % rmid_remap_matrix.step_size) == 0)
+		if (__step_align(x) && !is_rmid_remap_bmp_occ(*bmp))
 			return x;
 	}
 
@@ -1026,8 +1028,8 @@ static int __rmid_alloc(int partid, int pmg)
 	if (pmg >= 0)
 		checkpmg = true;
 
-	/* traverse from first non-occupied and step_size aligned entry */
-	ret = rmid_remap_bmp_find_first_avail_partid(partid);
+	/* traverse from first non-occupied and step-aligned entry */
+	ret = rmid_remap_bmp_find_step_entry(partid);
 	if (ret < 0)
 		goto out;
 	partid = ret;
-- 
Gitee


From 2b491a2e84f8f1b5c8cbd4715ea52129ebd88ac4 Mon Sep 17 00:00:00 2001
From: Zheng Yejian <zhengyejian1@huawei.com>
Date: Sat, 26 Mar 2022 10:50:09 +0000
Subject: [PATCH 102/340] livepatch/arm64: Fix incorrect endian conversion when
 long jump

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4ZIB2
CVE: NA

--------------------------------

Kernel panic happened on 'arm64 big endian' board after calling function
that has been live-patched. It can be reproduced as follows:
  1. Insert 'livepatch-sample.ko' to patch kernel function 'cmdline_proc_show';
  2. Enable patch by execute:
     echo 1 > /sys/kernel/livepatch/livepatch-sample/enabled
  3. Call 'cmdline_proc_show' by execute:
     cat /proc/cmdline
  4. Then we get following panic logs:
     > kernel BUG at arch/arm64/kernel/traps.c:408!
     > Internal error: Oops - BUG: 0 [#1] SMP
     > Modules linked in: dump_mem(OE) livepatch_cmdline1(OEK) hi1382_gmac(OE)
     > [last unloaded: dump_mem]
     > CPU: 3 PID: 1752 Comm: cat Session: 0 Tainted: G           OE K
     > 5.10.0+ #2
     > Hardware name: Hisilicon PhosphorHi1382 (DT)
     > pstate: 00000005 (nzcv daif -PAN -UAO -TCO BTYPE=--)
     > pc : do_undefinstr+0x23c/0x2b4
     > lr : do_undefinstr+0x5c/0x2b4
     > sp : ffffffc010ac3a80
     > x29: ffffffc010ac3a80 x28: ffffff82eb0a8000
     > x27: 0000000000000000 x26: 0000000000000001
     > x25: 0000000000000000 x24: 0000000000001000
     > x23: 0000000000000000 x22: ffffffd0e0f16000
     > x21: ffffffd0e0ae7000 x20: ffffffc010ac3b00
     > x19: 0000000000021fd6 x18: ffffffd0e04aad94
     > x17: 0000000000000000 x16: 0000000000000000
     > x15: ffffffd0e04b519c x14: 0000000000000000
     > x13: 0000000000000000 x12: 0000000000000000
     > x11: 0000000000000000 x10: 0000000000000000
     > x9 : 0000000000000000 x8 : 0000000000000000
     > x7 : 0000000000000000 x6 : ffffffd0e0f16100
     > x5 : 0000000000000000 x4 : 00000000d5300000
     > x3 : 0000000000000000 x2 : ffffffd0e0f160f0
     > x1 : ffffffd0e0f16103 x0 : 0000000000000005
     > Call trace:
     >  do_undefinstr+0x23c/0x2b4
     >  el1_undef+0x2c/0x44
     >  el1_sync_handler+0xa4/0xb0
     >  el1_sync+0x74/0x100
     >  cmdline_proc_show+0xc/0x44
     >  proc_reg_read_iter+0xb0/0xc4
     >  new_sync_read+0x10c/0x15c
     >  vfs_read+0x144/0x18c
     >  ksys_read+0x78/0xe8
     >  __arm64_sys_read+0x24/0x30

We compare first 6 instructions of 'cmdline_proc_show' before and after
patch (see below). There are 4 instructions modified, so this is case
that offset between old and new function is out of 128M. And we found
that instruction at 'cmdline_proc_show+0xc' seems incorrect (it expects
to be '00021fd6').
  origin:     patched:
  --------    --------
  fd7bbea9    929ff7f0
  21d500f0    f2a91b30
  fd030091    f2d00010
  211040f9    d61f0200 <-- cmdline_proc_show+0xc (expect is '00021fd6')
  f30b00f9    f30b00f9
  f30300aa    f30300aa

It is caused by an incorrect big-to-little endian conversion, and we
correct it.

Fixes: 5aa9a1a3488b ("livepatch/arm64: support livepatch without ftrace")
Signed-off-by: Zheng Yejian <zhengyejian1@huawei.com>
Reviewed-by: Kuohai Xu <xukuohai@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/livepatch.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/kernel/livepatch.c b/arch/arm64/kernel/livepatch.c
index d966d4767cb7..235e6f8b6719 100644
--- a/arch/arm64/kernel/livepatch.c
+++ b/arch/arm64/kernel/livepatch.c
@@ -251,10 +251,10 @@ int arch_klp_patch_func(struct klp_func *func)
 		if (aarch64_insn_patch_text_nosync((void *)pc, insn))
 			goto ERR_OUT;
 	} else {
-		insns[0] = cpu_to_le32(0x92800010 | (((~new_addr) & 0xffff)) << 5);
-		insns[1] = cpu_to_le32(0xf2a00010 | (((new_addr >> 16) & 0xffff)) << 5);
-		insns[2] = cpu_to_le32(0xf2c00010 | (((new_addr >> 32) & 0xffff)) << 5);
-		insns[3] = cpu_to_le32(0xd61f0200);
+		insns[0] = 0x92800010 | (((~new_addr) & 0xffff)) << 5;
+		insns[1] = 0xf2a00010 | (((new_addr >> 16) & 0xffff)) << 5;
+		insns[2] = 0xf2c00010 | (((new_addr >> 32) & 0xffff)) << 5;
+		insns[3] = 0xd61f0200;
 		for (i = 0; i < LJMP_INSN_SIZE; i++) {
 			if (aarch64_insn_patch_text_nosync(((u32 *)pc) + i, insns[i]))
 				goto ERR_OUT;
@@ -328,10 +328,10 @@ void arch_klp_unpatch_func(struct klp_func *func)
 
 			aarch64_insn_patch_text_nosync((void *)pc, insn);
 		} else {
-			insns[0] = cpu_to_le32(0x92800010 | (((~new_addr) & 0xffff)) << 5);
-			insns[1] = cpu_to_le32(0xf2a00010 | (((new_addr >> 16) & 0xffff)) << 5);
-			insns[2] = cpu_to_le32(0xf2c00010 | (((new_addr >> 32) & 0xffff)) << 5);
-			insns[3] = cpu_to_le32(0xd61f0200);
+			insns[0] = 0x92800010 | (((~new_addr) & 0xffff)) << 5;
+			insns[1] = 0xf2a00010 | (((new_addr >> 16) & 0xffff)) << 5;
+			insns[2] = 0xf2c00010 | (((new_addr >> 32) & 0xffff)) << 5;
+			insns[3] = 0xd61f0200;
 			for (i = 0; i < LJMP_INSN_SIZE; i++)
 				aarch64_insn_patch_text_nosync(((u32 *)pc) + i,
 						insns[i]);
-- 
Gitee


From 875ffd41499ee5a3512da409cbd4c2ffd32b3cfa Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Wed, 30 Mar 2022 03:33:02 +0000
Subject: [PATCH 103/340] mm: Do limit checking after memory allocation for
 memory reliable

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Previous limit checking are done before real memory allocation. This will
lead to some steps in __alloc_pages_slowpath() unreached(kswapd, direct
compact and so on).

Now limit checking are done in the end of __alloc_pages_nodemask(). Pages
will be released if this memory allocation is stopped by limit checking.

Memory allocation will fallback to movable zone if one of the following
conditions are met:
- memory reliable fallback is enabled
- global init process

Memory allocation with __GFP_NOFAIL will not check any limit.

If memory reliable fallback is disabled and gfp flag does not contains any
of the following flags:
- __GFP_NORETRY
- __GFP_RETRY_MAYFAIL
- __GFP_THISNODE
Or the following conditions are false
- current->flags & PF_DUMPCORE
- order > PAGE_ALLOC_COSTLY_ORDER
Oom will occur to release some memory.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/page_alloc.c | 194 +++++++++++++++++++-----------------------------
 1 file changed, 76 insertions(+), 118 deletions(-)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f4d4716b9049..fa1cdeba5293 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3671,60 +3671,6 @@ __alloc_pages_cpuset_fallback(gfp_t gfp_mask, unsigned int order,
 	return page;
 }
 
-#ifdef CONFIG_MEMORY_RELIABLE
-static inline void reliable_fb_find_zone(gfp_t gfp_mask,
-					 struct alloc_context *ac)
-{
-	if (!reliable_allow_fb_enabled())
-		return;
-
-	/* dst node don't have zone we want, fallback here */
-	if ((gfp_mask & __GFP_THISNODE) && (ac->high_zoneidx == ZONE_NORMAL) &&
-	    (gfp_mask & ___GFP_RELIABILITY)) {
-		ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY);
-		ac->preferred_zoneref = first_zones_zonelist(
-			ac->zonelist, ac->high_zoneidx, ac->nodemask);
-	}
-
-	return;
-}
-
-static inline struct page *
-reliable_fb_before_oom(gfp_t gfp_mask, int order,
-			  const struct alloc_context *ac)
-{
-	if (!reliable_allow_fb_enabled())
-		return NULL;
-
-	/* key user process alloc mem from movable zone to avoid oom */
-	if ((ac->high_zoneidx == ZONE_NORMAL) &&
-	    (gfp_mask & ___GFP_RELIABILITY)) {
-		struct alloc_context tmp_ac = *ac;
-
-		tmp_ac.high_zoneidx = ZONE_MOVABLE;
-		tmp_ac.preferred_zoneref = first_zones_zonelist(
-			ac->zonelist, ZONE_MOVABLE, ac->nodemask);
-		return get_page_from_freelist(
-			(gfp_mask | __GFP_HARDWALL) & ~__GFP_DIRECT_RECLAIM,
-			order, ALLOC_WMARK_HIGH | ALLOC_CPUSET, &tmp_ac);
-	}
-
-	return NULL;
-}
-#else
-static inline void reliable_fb_find_zone(gfp_t gfp_mask,
-					 struct alloc_context *ac)
-{
-	return;
-}
-
-static inline struct page *reliable_fb_before_oom(gfp_t gfp_mask, int order,
-						 const struct alloc_context *ac)
-{
-	return NULL;
-}
-#endif
-
 static inline struct page *
 __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	const struct alloc_context *ac, unsigned long *did_some_progress)
@@ -3763,10 +3709,6 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 	if (page)
 		goto out;
 
-	page = reliable_fb_before_oom(gfp_mask, order, ac);
-	if (page)
-		goto out;
-
 	/* Coredumps can quickly deplete all memory reserves */
 	if (current->flags & PF_DUMPCORE)
 		goto out;
@@ -4374,12 +4316,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	 */
 	ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
 					ac->high_zoneidx, ac->nodemask);
-	if (!ac->preferred_zoneref->zone) {
-		reliable_fb_find_zone(gfp_mask, ac);
-
-		if (!ac->preferred_zoneref->zone)
-			goto nopage;
-	}
+	if (!ac->preferred_zoneref->zone)
+		goto nopage;
 
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, gfp_mask, ac);
@@ -4638,74 +4576,94 @@ static inline void finalise_ac(gfp_t gfp_mask, struct alloc_context *ac)
 					ac->high_zoneidx, ac->nodemask);
 }
 
-/*
- * return false means this allocation is limit by reliable user limit and
- * this will lead to pagefault_out_of_memory()
- */
-static inline bool prepare_before_alloc(gfp_t *gfp_mask, unsigned int order)
+static inline void prepare_before_alloc(gfp_t *gfp_mask)
 {
 	gfp_t gfp_ori = *gfp_mask;
+	bool zone_movable;
+
 	*gfp_mask &= gfp_allowed_mask;
 
 	if (!mem_reliable_is_enabled())
-		return true;
+		return;
 
-	if (*gfp_mask & __GFP_NOFAIL)
-		return true;
+	/*
+	 * memory reliable only handle memory allocation from movable zone
+	 * (force alloc from non-movable zone or force alloc from movable
+	 * zone) to get total isolation.
+	 */
+	zone_movable = gfp_zone(*gfp_mask) == ZONE_MOVABLE;
+	if (!zone_movable)
+		return;
 
 	if (gfp_ori & ___GFP_RELIABILITY) {
-		if (!(gfp_ori & __GFP_HIGHMEM) || !(gfp_ori & __GFP_MOVABLE))
-			return true;
+		*gfp_mask |= ___GFP_RELIABILITY;
+		return;
+	}
 
-		if (mem_reliable_watermark_ok(1 << order)) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	if (is_global_init(current) || (current->flags & PF_RELIABLE))
+		*gfp_mask |= ___GFP_RELIABILITY;
+}
 
-		if (reliable_allow_fb_enabled())
-			return true;
+/*
+ * return true means memory allocation need retry and flag ___GFP_RELIABILITY
+ * must be cleared.
+ */
+static inline bool check_after_alloc(gfp_t *gfp_mask, unsigned int order,
+				     int preferred_nid, nodemask_t *nodemask,
+				     struct page **_page)
+{
+	if (!mem_reliable_is_enabled())
+		return false;
 
+	if (!(*gfp_mask & ___GFP_RELIABILITY))
 		return false;
-	}
 
-	/*
-	 * Init tasks will alloc memory from non-mirrored region if their
-	 * allocation trigger task_reliable_limit
-	 */
-	if (is_global_init(current)) {
-		if (!mem_reliable_counter_initialized()) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	if (!*_page)
+		goto out_retry;
 
-		if (reliable_mem_limit_check(1 << order) &&
-		    mem_reliable_watermark_ok(1 << order))
-			*gfp_mask |= ___GFP_RELIABILITY;
-		return true;
-	}
+	if (*gfp_mask & __GFP_NOFAIL)
+		goto out;
 
-	/*
-	 * This only check task_reliable_limit without ___GFP_RELIABILITY
-	 * or this process is global init.
-	 * For kernel internal mechanism(hugepaged collapse and others)
-	 * If they alloc memory for user and obey task_reliable_limit, they
-	 * need to check this limit before allocing pages.
-	 */
-	if ((current->flags & PF_RELIABLE) && (gfp_ori & __GFP_HIGHMEM) &&
-	    (gfp_ori & __GFP_MOVABLE)) {
-		if (reliable_mem_limit_check(1 << order) &&
-		    mem_reliable_watermark_ok(1 << order)) {
-			*gfp_mask |= ___GFP_RELIABILITY;
-			return true;
-		}
+	/* check water mark, reserver mirrored mem for kernel */
+	if (!mem_reliable_watermark_ok(1 << order))
+		goto out_free_page;
 
-		if (reliable_allow_fb_enabled())
-			return true;
+	/* percpu counter is not initialized, ignore limit check */
+	if (!mem_reliable_counter_initialized())
+		goto out;
 
-		return false;
+	/* spcial user task, systemd is limited by task_reliable_limit */
+	if (((current->flags & PF_RELIABLE) || is_global_init(current)) &&
+	    !reliable_mem_limit_check(1 << order))
+		goto out_free_page;
+
+	goto out;
+
+out_free_page:
+	__free_pages(*_page, order);
+	*_page = NULL;
+
+out_retry:
+	if (reliable_allow_fb_enabled() || is_global_init(current)) {
+		*gfp_mask &= ~___GFP_RELIABILITY;
+		return true;
 	}
 
-	return true;
+	if (*gfp_mask & (__GFP_NORETRY | __GFP_RETRY_MAYFAIL | __GFP_THISNODE))
+		goto out;
+
+	/* Coredumps can quickly deplete all memory reserves */
+	if (current->flags & PF_DUMPCORE)
+		goto out;
+	/* The OOM killer will not help higher order allocs */
+	if (order > PAGE_ALLOC_COSTLY_ORDER)
+		goto out;
+
+	/* oom here */
+	mem_reliable_out_of_memory(*gfp_mask, order, preferred_nid,
+				nodemask);
+out:
+	return false;
 }
 
 /*
@@ -4729,12 +4687,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 		return NULL;
 	}
 
-	if (!prepare_before_alloc(&gfp_mask, order)) {
-		mem_reliable_out_of_memory(gfp_mask, order, preferred_nid,
-					   nodemask);
-		goto out;
-	}
+	prepare_before_alloc(&gfp_mask);
 
+retry:
 	alloc_mask = gfp_mask;
 	if (!prepare_alloc_pages(gfp_mask, order, preferred_nid, nodemask, &ac, &alloc_mask, &alloc_flags))
 		return NULL;
@@ -4771,6 +4726,9 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
 		page = NULL;
 	}
 
+	if (check_after_alloc(&gfp_mask, order, preferred_nid, nodemask, &page))
+		goto retry;
+
 	trace_mm_page_alloc(page, order, alloc_mask, ac.migratetype);
 
 	return page;
-- 
Gitee


From a30ba5b645cf928245fa7db7705afda57b3b3d72 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Wed, 30 Mar 2022 03:33:03 +0000
Subject: [PATCH 104/340] mm: Disable watermark check if reliable fallback is
 disabled

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

If memory reliable watermark is far greater than zone's default watermark,
reliable memory allocation will trigger oom without wake up kswapd to
release pagecache.

With this patch, memory reliable watermark will be disabled if memory
fallback is disabled.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/mem_reliable.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h
index c4f954340ea7..8340a24fe76d 100644
--- a/include/linux/mem_reliable.h
+++ b/include/linux/mem_reliable.h
@@ -124,6 +124,9 @@ static inline bool mem_reliable_watermark_ok(int nr_page)
 	long sum = 0;
 	int cpu;
 
+	if (!reliable_allow_fb_enabled())
+		return true;
+
 	for_each_possible_cpu(cpu)
 		sum += per_cpu(nr_reliable_buddy_pages, cpu);
 
-- 
Gitee


From 368d710d7f327b6512cbc90846b33cbf9934a579 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Wed, 30 Mar 2022 03:33:04 +0000
Subject: [PATCH 105/340] mm: Fallback to non-mirrored region below low
 watermark

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Commit 45bd608ef89e ("mm: Introduce watermark check for memory reliable")
introduce watermark to reserve memory in mirrored region for kernel usage.
But this value does not interact well with kswapd and this will lead to
fallback to non-mirrored region without release pagecache in mirrored
region.

With this patch, watermark is set to zero to avoid this problem.
Memory allocation with ___GFP_RELIABILITY will fallback to non-mirrored
region if zone's low watermark is reached and kswapd will be awakened at
this time.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/mem_reliable.c |  2 +-
 mm/page_alloc.c   | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 5505577d3784..6d4ab4bee3d5 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -11,7 +11,7 @@
 #include <linux/oom.h>
 #include <linux/crash_dump.h>
 
-#define MEM_RELIABLE_RESERVE_MIN (256UL << 20)
+#define MEM_RELIABLE_RESERVE_MIN 0
 
 enum mem_reliable_types {
 	MEM_RELIABLE_ALL,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fa1cdeba5293..9d4f75235420 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4271,6 +4271,29 @@ check_retry_cpuset(int cpuset_mems_cookie, struct alloc_context *ac)
 	return false;
 }
 
+#ifdef CONFIG_MEMORY_RELIABLE
+static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask,
+						  struct alloc_context *ac)
+{
+	if (!reliable_allow_fb_enabled())
+		return;
+
+	if (gfp_mask & __GFP_NOFAIL)
+		return;
+
+	if ((ac->high_zoneidx == ZONE_NORMAL) &&
+	    (gfp_mask & ___GFP_RELIABILITY)) {
+		ac->high_zoneidx = gfp_zone(gfp_mask & ~___GFP_RELIABILITY);
+		ac->preferred_zoneref = first_zones_zonelist(
+			ac->zonelist, ac->high_zoneidx, ac->nodemask);
+		return;
+	}
+}
+#else
+static inline void mem_reliable_fallback_slowpath(gfp_t gfp_mask,
+						  struct alloc_context *ac) {}
+#endif
+
 static inline struct page *
 __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 						struct alloc_context *ac)
@@ -4322,6 +4345,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	if (gfp_mask & __GFP_KSWAPD_RECLAIM)
 		wake_all_kswapds(order, gfp_mask, ac);
 
+	mem_reliable_fallback_slowpath(gfp_mask, ac);
+
 	/*
 	 * The adjusted alloc_flags might result in immediate success, so try
 	 * that first
-- 
Gitee


From 93dfedd141b0d49150ce32ad88ca93fc1f614f47 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:41 +0000
Subject: [PATCH 106/340] ima: Don't ignore errors from crypto_shash_update()

stable inclusion
from stable-v4.19.153
commit c470dc530c9ee6ef4b22fed19c77e20c745564e1
category: bugfix
bugzilla: 83782, https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

commit 60386b854008adc951c470067f90a2d85b5d520f upstream.

Errors returned by crypto_shash_update() are not checked in
ima_calc_boot_aggregate_tfm() and thus can be overwritten at the next
iteration of the loop. This patch adds a check after calling
crypto_shash_update() and returns immediately if the result is not zero.

Cc: stable@vger.kernel.org
Fixes: 3323eec921efd ("integrity: IMA as an integrity service provider")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima_crypto.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index d9f4b4c84519..1289757e73ea 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -683,6 +683,8 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest,
 		ima_pcrread(i, pcr_i);
 		/* now accumulate with current aggregate */
 		rc = crypto_shash_update(shash, pcr_i, TPM_DIGEST_SIZE);
+		if (rc != 0)
+			return rc;
 	}
 	if (!rc)
 		crypto_shash_final(shash, digest);
-- 
Gitee


From 192b51c29805e6e4921a698a828225d518cc27c0 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:42 +0000
Subject: [PATCH 107/340] evm: Check size of security.evm before using it

stable inclusion
from stable-v4.19.155
commit 05f703b07727c0eb81f487143b583d5f8561d900
category: bugfix
bugzilla: 83797, https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

commit 455b6c9112eff8d249e32ba165742085678a80a4 upstream.

This patch checks the size for the EVM_IMA_XATTR_DIGSIG and
EVM_XATTR_PORTABLE_DIGSIG types to ensure that the algorithm is read from
the buffer returned by vfs_getxattr_alloc().

Cc: stable@vger.kernel.org # 4.19.x
Fixes: 5feeb61183dde ("evm: Allow non-SHA1 digital signatures")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/evm/evm_main.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/security/integrity/evm/evm_main.c b/security/integrity/evm/evm_main.c
index c0b08ac32dcf..47f878b13990 100644
--- a/security/integrity/evm/evm_main.c
+++ b/security/integrity/evm/evm_main.c
@@ -190,6 +190,12 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
 		break;
 	case EVM_IMA_XATTR_DIGSIG:
 	case EVM_XATTR_PORTABLE_DIGSIG:
+		/* accept xattr with non-empty signature field */
+		if (xattr_len <= sizeof(struct signature_v2_hdr)) {
+			evm_status = INTEGRITY_FAIL;
+			goto out;
+		}
+
 		hdr = (struct signature_v2_hdr *)xattr_data;
 		digest.hdr.algo = hdr->hash_algo;
 		rc = evm_calc_hash(dentry, xattr_name, xattr_value,
-- 
Gitee


From b7a7cb07f8f7a9b25a73a4e5c03f46cd5a29de6d Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:43 +0000
Subject: [PATCH 108/340] ima: Call ima_calc_boot_aggregate() in
 ima_eventdigest_init()

stable inclusion
from stable-v4.19.129
commit fcb067cb457e2326c6d759e346f5f5dfef351d50
category: bugfix
bugzilla: 89622, https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

[ Upstream commit 6cc7c266e5b47d3cd2b5bb7fd3aac4e6bb2dd1d2 ]

If the template field 'd' is chosen and the digest to be added to the
measurement entry was not calculated with SHA1 or MD5, it is
recalculated with SHA1, by using the passed file descriptor. However, this
cannot be done for boot_aggregate, because there is no file descriptor.

This patch adds a call to ima_calc_boot_aggregate() in
ima_eventdigest_init(), so that the digest can be recalculated also for the
boot_aggregate entry.

Cc: stable@vger.kernel.org # 3.13.x
Fixes: 3ce1217d6cd5d ("ima: define template fields library and new helpers")
Reported-by: Takashi Iwai <tiwai@suse.de>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>

Conflicts:
	security/integrity/ima/ima_crypto.c

Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima.h              |  3 ++-
 security/integrity/ima/ima_crypto.c       |  6 +++---
 security/integrity/ima/ima_init.c         |  2 +-
 security/integrity/ima/ima_template_lib.c | 18 ++++++++++++++++++
 4 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 8d58874a8caa..2619dee3b4a5 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -56,6 +56,7 @@ extern int ima_policy_flag;
 extern int ima_hash_algo;
 extern int ima_appraise;
 extern struct tpm_chip *ima_tpm_chip;
+extern const char boot_aggregate_name[];
 
 /* IMA event related data */
 struct ima_event_data {
@@ -139,7 +140,7 @@ int ima_calc_buffer_hash(const void *buf, loff_t len,
 int ima_calc_field_array_hash(struct ima_field_data *field_data,
 			      struct ima_template_desc *desc, int num_fields,
 			      struct ima_digest_data *hash);
-int __init ima_calc_boot_aggregate(struct ima_digest_data *hash);
+int ima_calc_boot_aggregate(struct ima_digest_data *hash);
 void ima_add_violation(struct file *file, const unsigned char *filename,
 		       struct integrity_iint_cache *iint,
 		       const char *op, const char *cause);
diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 1289757e73ea..0b2cd7cc9b14 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -663,8 +663,8 @@ static void __init ima_pcrread(int idx, u8 *pcr)
 /*
  * Calculate the boot aggregate hash
  */
-static int __init ima_calc_boot_aggregate_tfm(char *digest,
-					      struct crypto_shash *tfm)
+static int ima_calc_boot_aggregate_tfm(char *digest,
+				       struct crypto_shash *tfm)
 {
 	/*to be compatible with tpm1.2 ,make sure the content is not arbitrary*/
 	u8 pcr_i[IMA_DIGEST_SIZE] = {0};
@@ -691,7 +691,7 @@ static int __init ima_calc_boot_aggregate_tfm(char *digest,
 	return rc;
 }
 
-int __init ima_calc_boot_aggregate(struct ima_digest_data *hash)
+int ima_calc_boot_aggregate(struct ima_digest_data *hash)
 {
 	struct crypto_shash *tfm;
 	int rc;
diff --git a/security/integrity/ima/ima_init.c b/security/integrity/ima/ima_init.c
index faac9ecaa0ae..a2bc4cb4482a 100644
--- a/security/integrity/ima/ima_init.c
+++ b/security/integrity/ima/ima_init.c
@@ -25,7 +25,7 @@
 #include "ima.h"
 
 /* name for boot aggregate entry */
-static const char *boot_aggregate_name = "boot_aggregate";
+const char boot_aggregate_name[] = "boot_aggregate";
 struct tpm_chip *ima_tpm_chip;
 
 /* Add the boot aggregate to the IMA measurement list and extend
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 43752002c222..48c5a1be88ac 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -284,6 +284,24 @@ int ima_eventdigest_init(struct ima_event_data *event_data,
 		goto out;
 	}
 
+	if ((const char *)event_data->filename == boot_aggregate_name) {
+		if (ima_tpm_chip) {
+			hash.hdr.algo = HASH_ALGO_SHA1;
+			result = ima_calc_boot_aggregate(&hash.hdr);
+
+			/* algo can change depending on available PCR banks */
+			if (!result && hash.hdr.algo != HASH_ALGO_SHA1)
+				result = -EINVAL;
+
+			if (result < 0)
+				memset(&hash, 0, sizeof(hash));
+		}
+
+		cur_digest = hash.hdr.digest;
+		cur_digestsize = hash_digest_size[HASH_ALGO_SHA1];
+		goto out;
+	}
+
 	if (!event_data->file)	/* missing info to re-calculate the digest */
 		return -EINVAL;
 
-- 
Gitee


From d073934a1ceae819057c8e1a98d384cb506b1806 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:44 +0000
Subject: [PATCH 109/340] ima: Remove __init annotation from ima_pcrread()

stable inclusion
from stable-v4.19.169
commit de581e41716795ce93506f3e5b0200048aa4439c
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

commit 8b8c704d913b0fe490af370631a4200e26334ec0 upstream.

Commit 6cc7c266e5b4 ("ima: Call ima_calc_boot_aggregate() in
ima_eventdigest_init()") added a call to ima_calc_boot_aggregate() so that
the digest can be recalculated for the boot_aggregate measurement entry if
the 'd' template field has been requested. For the 'd' field, only SHA1 and
MD5 digests are accepted.

Given that ima_eventdigest_init() does not have the __init annotation, all
functions called should not have it. This patch removes __init from
ima_pcrread().

Cc: stable@vger.kernel.org
Fixes:  6cc7c266e5b4 ("ima: Call ima_calc_boot_aggregate() in ima_eventdigest_init()")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima_crypto.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 0b2cd7cc9b14..9f969dc8654d 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -651,7 +651,7 @@ int ima_calc_buffer_hash(const void *buf, loff_t len,
 	return calc_buffer_shash(buf, len, hash);
 }
 
-static void __init ima_pcrread(int idx, u8 *pcr)
+static void ima_pcrread(int idx, u8 *pcr)
 {
 	if (!ima_tpm_chip)
 		return;
-- 
Gitee


From 458e2478b8a6afeb6ea9cf6587ae2db89412f5f0 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:45 +0000
Subject: [PATCH 110/340] ima: Set file->f_mode instead of file->f_flags in
 ima_calc_file_hash()

stable inclusion
from stable-v4.19.125
commit 904de138bae903a6e377322e64312e22a84f2140
category: bugfix
bugzilla: 91657, https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

[ Upstream commit 0014cc04e8ec077dc482f00c87dfd949cfe2b98f ]

Commit a408e4a86b36 ("ima: open a new file instance if no read
permissions") tries to create a new file descriptor to calculate a file
digest if the file has not been opened with O_RDONLY flag. However, if a
new file descriptor cannot be obtained, it sets the FMODE_READ flag to
file->f_flags instead of file->f_mode.

This patch fixes this issue by replacing f_flags with f_mode as it was
before that commit.

Cc: stable@vger.kernel.org # 4.20.x
Fixes: a408e4a86b36 ("ima: open a new file instance if no read permissions")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima_crypto.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 9f969dc8654d..072a95ddc8b5 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -415,7 +415,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 	loff_t i_size;
 	int rc;
 	struct file *f = file;
-	bool new_file_instance = false, modified_flags = false;
+	bool new_file_instance = false, modified_mode = false;
 
 	/*
 	 * For consistency, fail file's opened with the O_DIRECT flag on
@@ -435,13 +435,13 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 		f = dentry_open(&file->f_path, flags, file->f_cred);
 		if (IS_ERR(f)) {
 			/*
-			 * Cannot open the file again, lets modify f_flags
+			 * Cannot open the file again, lets modify f_mode
 			 * of original and continue
 			 */
 			pr_info_ratelimited("Unable to reopen file for reading.\n");
 			f = file;
-			f->f_flags |= FMODE_READ;
-			modified_flags = true;
+			f->f_mode |= FMODE_READ;
+			modified_mode = true;
 		} else {
 			new_file_instance = true;
 		}
@@ -459,8 +459,8 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 out:
 	if (new_file_instance)
 		fput(f);
-	else if (modified_flags)
-		f->f_flags &= ~FMODE_READ;
+	else if (modified_mode)
+		f->f_mode &= ~FMODE_READ;
 	return rc;
 }
 
-- 
Gitee


From d342874100782784237d51154f193a934e88c729 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:46 +0000
Subject: [PATCH 111/340] ima: Don't modify file descriptor mode on the fly

stable inclusion
from stable-v4.19.164
commit 709ed96f6ef2b040a0ea9274b4d67ce61a095eb3
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

commit 207cdd565dfc95a0a5185263a567817b7ebf5467 upstream.

Commit a408e4a86b36b ("ima: open a new file instance if no read
permissions") already introduced a second open to measure a file when the
original file descriptor does not allow it. However, it didn't remove the
existing method of changing the mode of the original file descriptor, which
is still necessary if the current process does not have enough privileges
to open a new one.

Changing the mode isn't really an option, as the filesystem might need to
do preliminary steps to make the read possible. Thus, this patch removes
the code and keeps the second open as the only option to measure a file
when it is unreadable with the original file descriptor.

Cc: <stable@vger.kernel.org> # 4.20.x: 0014cc04e8ec0 ima: Set file->f_mode
Fixes: 2fe5d6def1672 ("ima: integrity appraisal extension")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima_crypto.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

diff --git a/security/integrity/ima/ima_crypto.c b/security/integrity/ima/ima_crypto.c
index 072a95ddc8b5..cda1697c8fda 100644
--- a/security/integrity/ima/ima_crypto.c
+++ b/security/integrity/ima/ima_crypto.c
@@ -415,7 +415,7 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 	loff_t i_size;
 	int rc;
 	struct file *f = file;
-	bool new_file_instance = false, modified_mode = false;
+	bool new_file_instance = false;
 
 	/*
 	 * For consistency, fail file's opened with the O_DIRECT flag on
@@ -433,18 +433,10 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 				O_TRUNC | O_CREAT | O_NOCTTY | O_EXCL);
 		flags |= O_RDONLY;
 		f = dentry_open(&file->f_path, flags, file->f_cred);
-		if (IS_ERR(f)) {
-			/*
-			 * Cannot open the file again, lets modify f_mode
-			 * of original and continue
-			 */
-			pr_info_ratelimited("Unable to reopen file for reading.\n");
-			f = file;
-			f->f_mode |= FMODE_READ;
-			modified_mode = true;
-		} else {
-			new_file_instance = true;
-		}
+		if (IS_ERR(f))
+			return PTR_ERR(f);
+
+		new_file_instance = true;
 	}
 
 	i_size = i_size_read(file_inode(f));
@@ -459,8 +451,6 @@ int ima_calc_file_hash(struct file *file, struct ima_digest_data *hash)
 out:
 	if (new_file_instance)
 		fput(f);
-	else if (modified_mode)
-		f->f_mode &= ~FMODE_READ;
 	return rc;
 }
 
-- 
Gitee


From 50cea8d3bee9c0b82409c216fb945a4c0e05e5a9 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Sat, 2 Apr 2022 07:03:47 +0000
Subject: [PATCH 112/340] ima: Fix return value of ima_write_policy()

stable inclusion
from stable-v4.19.125
commit 657a03ff6c97319d8e664c05a1beebd8eb15d049
category: bugfix
bugzilla: 91661, https://gitee.com/openeuler/kernel/issues/I5047U
CVE: NA

-----------------------------------------------------------------

[ Upstream commit 2e3a34e9f409ebe83d1af7cd2f49fca7af97dfac ]

This patch fixes the return value of ima_write_policy() when a new policy
is directly passed to IMA and the current policy requires appraisal of the
file containing the policy. Currently, if appraisal is not in ENFORCE mode,
ima_write_policy() returns 0 and leads user space applications to an
endless loop. Fix this issue by denying the operation regardless of the
appraisal mode.

Cc: stable@vger.kernel.org # 4.10.x
Fixes: 19f8a84713edc ("ima: measure and appraise the IMA policy itself")
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Reviewed-by: Krzysztof Struczynski <krzysztof.struczynski@huawei.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Wang Weiyang <wangweiyang2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 security/integrity/ima/ima_fs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index cfb8cc3b975e..604cdac63d84 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -343,8 +343,7 @@ static ssize_t ima_write_policy(struct file *file, const char __user *buf,
 		integrity_audit_msg(AUDIT_INTEGRITY_STATUS, NULL, NULL,
 				    "policy_update", "signed policy required",
 				    1, 0);
-		if (ima_appraise & IMA_APPRAISE_ENFORCE)
-			result = -EACCES;
+		result = -EACCES;
 	} else {
 		result = ima_parse_add_rule(data);
 	}
-- 
Gitee


From 2126576ff274665fa5b98bff56e7da1e0a6335cc Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@suse.com>
Date: Sat, 2 Apr 2022 07:03:48 +0000
Subject: [PATCH 113/340] sr9700: sanity check for packet length

mainline inclusion
from mainline-v5.17-rc4
commit e9da0b56fe27206b49f39805f7dcda8a89379062
category: bugfix
bugzilla: 186472, https://gitee.com/openeuler/kernel/issues/I4ZWKH
CVE: CVE-2022-26966

--------------------------------

A malicious device can leak heap data to user space
providing bogus frame lengths. Introduce a sanity check.

Signed-off-by: Oliver Neukum <oneukum@suse.com>
Reviewed-by: Grant Grundler <grundler@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/usb/sr9700.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/sr9700.c b/drivers/net/usb/sr9700.c
index 6ac232e52bf7..83640628c47d 100644
--- a/drivers/net/usb/sr9700.c
+++ b/drivers/net/usb/sr9700.c
@@ -410,7 +410,7 @@ static int sr9700_rx_fixup(struct usbnet *dev, struct sk_buff *skb)
 		/* ignore the CRC length */
 		len = (skb->data[1] | (skb->data[2] << 8)) - 4;
 
-		if (len > ETH_FRAME_LEN)
+		if (len > ETH_FRAME_LEN || len > skb->len)
 			return 0;
 
 		/* the last packet of current skb */
-- 
Gitee


From a91080fd1fe919a42e817866a08f4a30bbf04238 Mon Sep 17 00:00:00 2001
From: Szymon Heidrich <szymon.heidrich@gmail.com>
Date: Sat, 2 Apr 2022 07:03:49 +0000
Subject: [PATCH 114/340] USB: gadget: validate endpoint index for xilinx udc

mainline inclusion
from mainline-v5.17-rc4
commit 7f14c7227f342d9932f9b918893c8814f86d2a0d
category: bugfix
bugzilla: 186427, https://gitee.com/openeuler/kernel/issues/I4ZWQA
CVE: CVE-2022-27223

--------------------------------

Assure that host may not manipulate the index to point
past endpoint array.

Signed-off-by: Szymon Heidrich <szymon.heidrich@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/usb/gadget/udc/udc-xilinx.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index 6407e433bc78..72f1bc6a680e 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -1613,6 +1613,8 @@ static void xudc_getstatus(struct xusb_udc *udc)
 		break;
 	case USB_RECIP_ENDPOINT:
 		epnum = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
+		if (epnum >= XUSB_MAX_ENDPOINTS)
+			goto stall;
 		target_ep = &udc->ep[epnum];
 		epcfgreg = udc->read_fn(udc->addr + target_ep->offset);
 		halt = epcfgreg & XUSB_EP_CFG_STALL_MASK;
@@ -1680,6 +1682,10 @@ static void xudc_set_clear_feature(struct xusb_udc *udc)
 	case USB_RECIP_ENDPOINT:
 		if (!udc->setup.wValue) {
 			endpoint = udc->setup.wIndex & USB_ENDPOINT_NUMBER_MASK;
+			if (endpoint >= XUSB_MAX_ENDPOINTS) {
+				xudc_ep0_stall(udc);
+				return;
+			}
 			target_ep = &udc->ep[endpoint];
 			outinbit = udc->setup.wIndex & USB_ENDPOINT_DIR_MASK;
 			outinbit = outinbit >> 7;
-- 
Gitee


From a5e62d73dee762e486e26df79fe05df5c2e843c6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 2 Apr 2022 07:03:50 +0000
Subject: [PATCH 115/340] Revert "swiotlb: rework "fix info leak with
 DMA_FROM_DEVICE""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mainline inclusion
from mainline-v5.18-rc1
commit bddac7c1e02ba47f0570e494c9289acea3062cc1
category: bugfix
bugzilla: 186478, https://gitee.com/openeuler/kernel/issues/I4Z86P
CVE: CVE-2022-0854

--------------------------------

This reverts commit aa6f8dcbab473f3a3c7454b74caa46d36cdc5d13.

It turns out this breaks at least the ath9k wireless driver, and
possibly others.

What the ath9k driver does on packet receive is to set up the DMA
transfer with:

  int ath_rx_init(..)
  ..
                bf->bf_buf_addr = dma_map_single(sc->dev, skb->data,
                                                 common->rx_bufsize,
                                                 DMA_FROM_DEVICE);

and then the receive logic (through ath_rx_tasklet()) will fetch
incoming packets

  static bool ath_edma_get_buffers(..)
  ..
        dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
                                common->rx_bufsize, DMA_FROM_DEVICE);

        ret = ath9k_hw_process_rxdesc_edma(ah, rs, skb->data);
        if (ret == -EINPROGRESS) {
                /*let device gain the buffer again*/
                dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
                                common->rx_bufsize, DMA_FROM_DEVICE);
                return false;
        }

and it's worth noting how that first DMA sync:

    dma_sync_single_for_cpu(..DMA_FROM_DEVICE);

is there to make sure the CPU can read the DMA buffer (possibly by
copying it from the bounce buffer area, or by doing some cache flush).
The iommu correctly turns that into a "copy from bounce bufer" so that
the driver can look at the state of the packets.

In the meantime, the device may continue to write to the DMA buffer, but
we at least have a snapshot of the state due to that first DMA sync.

But that _second_ DMA sync:

    dma_sync_single_for_device(..DMA_FROM_DEVICE);

is telling the DMA mapping that the CPU wasn't interested in the area
because the packet wasn't there.  In the case of a DMA bounce buffer,
that is a no-op.

Note how it's not a sync for the CPU (the "for_device()" part), and it's
not a sync for data written by the CPU (the "DMA_FROM_DEVICE" part).

Or rather, it _should_ be a no-op.  That's what commit aa6f8dcbab47
broke: it made the code bounce the buffer unconditionally, and changed
the DMA_FROM_DEVICE to just unconditionally and illogically be
DMA_TO_DEVICE.

[ Side note: purely within the confines of the swiotlb driver it wasn't
  entirely illogical: The reason it did that odd DMA_FROM_DEVICE ->
  DMA_TO_DEVICE conversion thing is because inside the swiotlb driver,
  it uses just a swiotlb_bounce() helper that doesn't care about the
  whole distinction of who the sync is for - only which direction to
  bounce.

  So it took the "sync for device" to mean that the CPU must have been
  the one writing, and thought it meant DMA_TO_DEVICE. ]

Also note how the commentary in that commit was wrong, probably due to
that whole confusion, claiming that the commit makes the swiotlb code

                                  "bounce unconditionally (that is, also
    when dir == DMA_TO_DEVICE) in order do avoid synchronising back stale
    data from the swiotlb buffer"

which is nonsensical for two reasons:

 - that "also when dir == DMA_TO_DEVICE" is nonsensical, as that was
   exactly when it always did - and should do - the bounce.

 - since this is a sync for the device (not for the CPU), we're clearly
   fundamentally not coping back stale data from the bounce buffers at
   all, because we'd be copying *to* the bounce buffers.

So that commit was just very confused.  It confused the direction of the
synchronization (to the device, not the cpu) with the direction of the
DMA (from the device).

Reported-and-bisected-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Reported-by: Olha Cherevyk <olha.cherevyk@gmail.com>
Cc: Halil Pasic <pasic@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Kalle Valo <kvalo@kernel.org>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Toke Høiland-Jørgensen <toke@toke.dk>
Cc: Maxime Bizon <mbizon@freebox.fr>
Cc: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Conflicts:
	Documentation/core-api/dma-attributes.rst
	include/linux/dma-mapping.h
	kernel/dma/swiotlb.c
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/DMA-attributes.txt |  8 ++++++++
 include/linux/dma-mapping.h      |  8 ++++++++
 kernel/dma/swiotlb.c             | 26 ++++++++++----------------
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 8f8d97f65d73..34742fc00ec0 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -156,3 +156,11 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
+
+DMA_ATTR_OVERWRITE
+------------------
+
+This is a hint to the DMA-mapping subsystem that the device is expected to
+overwrite the entire mapped size, thus the caller does not require any of the
+previous buffer contents to be preserved. This allows bounce-buffering
+implementations to optimise DMA_FROM_DEVICE transfers.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 9d39d97e09f4..aa23cade6991 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,6 +70,14 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
+/*
+ * This is a hint to the DMA-mapping subsystem that the device is expected
+ * to overwrite the entire mapped size, thus the caller does not require any
+ * of the previous buffer contents to be preserved. This allows
+ * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
+ */
+#define DMA_ATTR_OVERWRITE		(1UL << 10)
+
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index b6eee8ea867b..1624d12b60af 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -583,14 +583,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	 */
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	/*
-	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
-	 * to the tlb buffer, if we knew for sure the device will
-	 * overwirte the entire current content. But we don't. Thus
-	 * unconditional bounce may prevent leaking swiotlb content (i.e.
-	 * kernel memory) to user-space.
-	 */
-	swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
+	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+	    (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
+	    dir == DMA_BIDIRECTIONAL))
+		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
+
 	return tlb_addr;
 }
 
@@ -682,14 +679,11 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 			BUG_ON(dir != DMA_TO_DEVICE);
 		break;
 	case SYNC_FOR_DEVICE:
-		/*
-		 * Unconditional bounce is necessary to avoid corruption on
-		 * sync_*_for_cpu or dma_ummap_* when the device didn't overwrite
-		 * the whole lengt of the bounce buffer.
-		 */
-		swiotlb_bounce(orig_addr, tlb_addr,
-			       size, DMA_TO_DEVICE);
-		BUG_ON(!valid_dma_direction(dir));
+		if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
+			swiotlb_bounce(orig_addr, tlb_addr,
+				       size, DMA_TO_DEVICE);
+		else
+			BUG_ON(dir != DMA_FROM_DEVICE);
 		break;
 	default:
 		BUG();
-- 
Gitee


From 3e10969090c6f2aab0f0136aaf59d315e12b2f31 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 2 Apr 2022 07:03:51 +0000
Subject: [PATCH 116/340] Reinstate some of "swiotlb: rework "fix info leak
 with DMA_FROM_DEVICE""
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mainline inclusion
from mainline-v5.18-rc1
commit 901c7280ca0d5e2b4a8929fbe0bfb007ac2a6544
category: bugfix
bugzilla: 186478, https://gitee.com/openeuler/kernel/issues/I4Z86P
CVE: CVE-2022-0854

--------------------------------

Halil Pasic points out [1] that the full revert of that commit (revert
in bddac7c1e02b), and that a partial revert that only reverts the
problematic case, but still keeps some of the cleanups is probably
better.  ￼

And that partial revert [2] had already been verified by Oleksandr
Natalenko to also fix the issue, I had just missed that in the long
discussion.

So let's reinstate the cleanups from commit aa6f8dcbab47 ("swiotlb:
rework "fix info leak with DMA_FROM_DEVICE""), and effectively only
revert the part that caused problems.

Link: https://lore.kernel.org/all/20220328013731.017ae3e3.pasic@linux.ibm.com/ [1]
Link: https://lore.kernel.org/all/20220324055732.GB12078@lst.de/ [2]
Link: https://lore.kernel.org/all/4386660.LvFx2qVVIh@natalenko.name/ [3]
Suggested-by: Halil Pasic <pasic@linux.ibm.com>
Tested-by: Oleksandr Natalenko <oleksandr@natalenko.name>
Cc: Christoph Hellwig" <hch@lst.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Conflicts:
	Documentation/core-api/dma-attributes.rst
	include/linux/dma-mapping.h
	kernel/dma/swiotlb.c
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/DMA-attributes.txt |  8 --------
 include/linux/dma-mapping.h      |  8 --------
 kernel/dma/swiotlb.c             | 13 ++++++++-----
 3 files changed, 8 insertions(+), 21 deletions(-)

diff --git a/Documentation/DMA-attributes.txt b/Documentation/DMA-attributes.txt
index 34742fc00ec0..8f8d97f65d73 100644
--- a/Documentation/DMA-attributes.txt
+++ b/Documentation/DMA-attributes.txt
@@ -156,11 +156,3 @@ accesses to DMA buffers in both privileged "supervisor" and unprivileged
 subsystem that the buffer is fully accessible at the elevated privilege
 level (and ideally inaccessible or at least read-only at the
 lesser-privileged levels).
-
-DMA_ATTR_OVERWRITE
-------------------
-
-This is a hint to the DMA-mapping subsystem that the device is expected to
-overwrite the entire mapped size, thus the caller does not require any of the
-previous buffer contents to be preserved. This allows bounce-buffering
-implementations to optimise DMA_FROM_DEVICE transfers.
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index aa23cade6991..9d39d97e09f4 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -70,14 +70,6 @@
  */
 #define DMA_ATTR_PRIVILEGED		(1UL << 9)
 
-/*
- * This is a hint to the DMA-mapping subsystem that the device is expected
- * to overwrite the entire mapped size, thus the caller does not require any
- * of the previous buffer contents to be preserved. This allows
- * bounce-buffering implementations to optimise DMA_FROM_DEVICE transfers.
- */
-#define DMA_ATTR_OVERWRITE		(1UL << 10)
-
 /*
  * A dma_addr_t can hold any valid DMA or bus address for the platform.
  * It can be given to a device to use as a DMA source or target.  A CPU cannot
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index 1624d12b60af..694e7ccea659 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -583,11 +583,14 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
 	 */
 	for (i = 0; i < nslots; i++)
 		io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
-	if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
-	    (!(attrs & DMA_ATTR_OVERWRITE) || dir == DMA_TO_DEVICE ||
-	    dir == DMA_BIDIRECTIONAL))
-		swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
-
+	/*
+	 * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
+	 * to the tlb buffer, if we knew for sure the device will
+	 * overwirte the entire current content. But we don't. Thus
+	 * unconditional bounce may prevent leaking swiotlb content (i.e.
+	 * kernel memory) to user-space.
+	 */
+	swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE);
 	return tlb_addr;
 }
 
-- 
Gitee


From fb59563c5a1f53a87ba7f9f6a341156b6e2d2c76 Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Sat, 2 Apr 2022 07:03:52 +0000
Subject: [PATCH 117/340] sched/fair: Add qos_throttle_list node in struct
 cfs_rq

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I50PPU
CVE: NA

-----------------------------------------------------------------

when unthrottle a cfs_rq at distribute_cfs_runtime(), another cpu
may re-throttle this cfs_rq at qos_throttle_cfs_rq() before access
the cfs_rq->throttle_list.next, but meanwhile, qos throttle will
attach the cfs_rq throttle_list node to percpu qos_throttled_cfs_rq,
it will change cfs_rq->throttle_list.next and cause panic or hardlockup
at distribute_cfs_runtime().

Fix it by adding a qos_throttle_list node in struct cfs_rq, and qos
throttle disuse the cfs_rq->throttle_list.

Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: zheng zucheng <zhengzucheng@huawei.com>
Reviewed-by: Chen Hui <judy.chenhui@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/sched/fair.c  | 10 +++++++---
 kernel/sched/sched.h |  5 +++++
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 52029f3a7439..4c7c0144a852 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -5059,6 +5059,9 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
 {
 	cfs_rq->runtime_enabled = 0;
 	INIT_LIST_HEAD(&cfs_rq->throttled_list);
+#ifdef CONFIG_QOS_SCHED
+	INIT_LIST_HEAD(&cfs_rq->qos_throttled_list);
+#endif
 }
 
 void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
@@ -6925,7 +6928,8 @@ static void throttle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 	cfs_rq->throttled = 1;
 	cfs_rq->throttled_clock = rq_clock(rq);
 
-	list_add(&cfs_rq->throttled_list, &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
+	list_add(&cfs_rq->qos_throttled_list,
+		 &per_cpu(qos_throttled_cfs_rq, cpu_of(rq)));
 }
 
 static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
@@ -6943,7 +6947,7 @@ static void unthrottle_qos_cfs_rq(struct cfs_rq *cfs_rq)
 	update_rq_clock(rq);
 
 	cfs_b->throttled_time += rq_clock(rq) - cfs_rq->throttled_clock;
-	list_del_init(&cfs_rq->throttled_list);
+	list_del_init(&cfs_rq->qos_throttled_list);
 
 	/* update hierarchical throttle state */
 	walk_tg_tree_from(cfs_rq->tg, tg_nop, tg_unthrottle_up, (void *)rq);
@@ -6984,7 +6988,7 @@ static int __unthrottle_qos_cfs_rqs(int cpu)
 	int res = 0;
 
 	list_for_each_entry_safe(cfs_rq, tmp_rq, &per_cpu(qos_throttled_cfs_rq, cpu),
-				throttled_list) {
+				 qos_throttled_list) {
 		if (cfs_rq_throttled(cfs_rq)) {
 			unthrottle_qos_cfs_rq(cfs_rq);
 			res++;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 1aaff1aa89f6..ae3068153093 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -592,6 +592,11 @@ struct cfs_rq {
 #endif
 
 	KABI_RESERVE(2)
+#ifndef __GENKSYMS__
+#ifdef CONFIG_QOS_SCHED
+	struct list_head	qos_throttled_list;
+#endif
+#endif
 };
 
 static inline int rt_bandwidth_enabled(void)
-- 
Gitee


From 5597897f03c0077f24712c33c579beb15986b1fe Mon Sep 17 00:00:00 2001
From: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Date: Sat, 2 Apr 2022 07:49:18 +0000
Subject: [PATCH 118/340] serial: 8250: Fix max baud limit in generic 8250 port

stable inclusion
from linux-4.19.130
commit 0eeaf62981ecc79e8395ca8caa1570eaf3a12257
category: bugfix
bugzilla: 89545
CVE: N/A

------------------------------------------------

[ Upstream commit 7b668c064ec33f3d687c3a413d05e355172e6c92 ]

Standard 8250 UART ports are designed in a way so they can communicate
with baud rates up to 1/16 of a reference frequency. It's expected from
most of the currently supported UART controllers. That's why the former
version of serial8250_get_baud_rate() method called uart_get_baud_rate()
with min and max baud rates passed as (port->uartclk / 16 / UART_DIV_MAX)
and ((port->uartclk + tolerance) / 16) respectively. Doing otherwise, like
it was suggested in commit ("serial: 8250_mtk: support big baud rate."),
caused acceptance of bauds, which was higher than the normal UART
controllers actually supported. As a result if some user-space program
requested to set a baud greater than (uartclk / 16) it would have been
permitted without truncation, but then serial8250_get_divisor(baud)
(which calls uart_get_divisor() to get the reference clock divisor) would
have returned a zero divisor. Setting zero divisor will cause an
unpredictable effect varying from chip to chip. In case of DW APB UART the
communications just stop.

Lets fix this problem by getting back the limitation of (uartclk +
tolerance) / 16 maximum baud supported by the generic 8250 port. Mediatek
8250 UART ports driver developer shouldn't have touched it in the first
place  notably seeing he already provided a custom version of set_termios()
callback in that glue-driver which took into account the extended baud
rate values and accordingly updated the standard and vendor-specific
divisor latch registers anyway.

Fixes: 81bb549fdf14 ("serial: 8250_mtk: support big baud rate.")
Signed-off-by: Serge Semin <Sergey.Semin@baikalelectronics.ru>
Cc: Alexey Malahov <Alexey.Malahov@baikalelectronics.ru>
Cc: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
Cc: Paul Burton <paulburton@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Long Cheng <long.cheng@mediatek.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Maxime Ripard <mripard@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Russell King <linux@armlinux.org.uk>
Cc: linux-mips@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-mediatek@lists.infradead.org
Link: https://lore.kernel.org/r/20200506233136.11842-2-Sergey.Semin@baikalelectronics.ru
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/tty/serial/8250/8250_port.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_port.c b/drivers/tty/serial/8250/8250_port.c
index 1867c2546cd9..3a0604b025e9 100644
--- a/drivers/tty/serial/8250/8250_port.c
+++ b/drivers/tty/serial/8250/8250_port.c
@@ -2647,6 +2647,8 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port,
 					     struct ktermios *termios,
 					     struct ktermios *old)
 {
+	unsigned int tolerance = port->uartclk / 100;
+
 	/*
 	 * Ask the core to calculate the divisor for us.
 	 * Allow 1% tolerance at the upper limit so uart clks marginally
@@ -2655,7 +2657,7 @@ static unsigned int serial8250_get_baud_rate(struct uart_port *port,
 	 */
 	return uart_get_baud_rate(port, termios, old,
 				  port->uartclk / 16 / UART_DIV_MAX,
-				  port->uartclk);
+				  (port->uartclk + tolerance) / 16);
 }
 
 void
-- 
Gitee


From df45decfa77ac71641a2324bb08196d164f1e8ee Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Mon, 11 Apr 2022 10:35:37 +0800
Subject: [PATCH 119/340] printk: Convert a use of sprintf to snprintf in
 console_unlock

mainline inclusion
from mainline-v5.8-rc1
commit 5661dd95a2958634485bb1a53f90a6ab621d4b0c
category: bugfix
bugzilla: 91291
CVE: N/A

--------------------------------

When CONFIG_PRINTK is disabled (e.g. when building allnoconfig), clang
warns:

../kernel/printk/printk.c:2416:10: warning: 'sprintf' will always
overflow; destination buffer has size 0, but format string expands to at
least 33 [-Wfortify-source]
			len = sprintf(text,
			      ^
1 warning generated.

It is not wrong; text has a zero size when CONFIG_PRINTK is disabled
because LOG_LINE_MAX and PREFIX_MAX are both zero. Change to snprintf so
that this case is explicitly handled without any risk of overflow.

Link: https://github.com/ClangBuiltLinux/linux/issues/846
Link: https://github.com/llvm/llvm-project/commit/6d485ff455ea2b37fef9e06e426dae6c1241b231
Link: http://lkml.kernel.org/r/20200130221644.2273-1-natechancellor@gmail.com
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: linux-kernel@vger.kernel.org
Cc: clang-built-linux@googlegroups.com
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Signed-off-by: Petr Mladek <pmladek@suse.com>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/printk/printk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 0fe45941b5c7..c645a7221d0b 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -2423,9 +2423,9 @@ void console_unlock(void)
 		printk_safe_enter_irqsave(flags);
 		raw_spin_lock(&logbuf_lock);
 		if (console_seq < log_first_seq) {
-			len = sprintf(text,
-				      "** %llu printk messages dropped **\n",
-				      log_first_seq - console_seq);
+			len = snprintf(text, sizeof(text),
+				       "** %llu printk messages dropped **\n",
+				       log_first_seq - console_seq);
 
 			/* messages are gone, move to first one */
 			console_seq = log_first_seq;
-- 
Gitee


From 39a38fe3a705b89c418f63d6730ad5c65876ff93 Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Mon, 11 Apr 2022 10:35:38 +0800
Subject: [PATCH 120/340] tty: don't crash in tty_init_dev when missing
 tty_port

mainline inclusion
from mainline-v5.5-rc1
commit 2ae0b31e0faced43c011ce3221f2535721cb6a66
category: bugfix
bugzilla: 82118
CVE: N/A

--------------------------------

We currently warn the user when tty->port is not set in tty_init_dev
yet. The warning says that the kernel will crash later. And it really
will only few lines below at:
tty->port->itty = tty;

So be nice and avoid the crash -- return an error instead. And update
the warning.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Link: https://lore.kernel.org/r/20191122101721.7222-1-jslaby@suse.cz
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/tty_io.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 3025c39ba6b1..dc522e994e80 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1343,9 +1343,12 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	if (!tty->port)
 		tty->port = driver->ports[idx];
 
-	WARN_RATELIMIT(!tty->port,
-			"%s: %s driver does not set tty->port. This will crash the kernel later. Fix the driver!\n",
-			__func__, tty->driver->name);
+	if (WARN_RATELIMIT(!tty->port,
+			"%s: %s driver does not set tty->port. This would crash the kernel. Fix the driver!\n",
+			__func__, tty->driver->name)) {
+		retval = -EINVAL;
+		goto err_release_lock;
+	}
 
 	retval = tty_ldisc_lock(tty, 5 * HZ);
 	if (retval)
-- 
Gitee


From 5485fe7b1bb5be0515a0177fb26eb71d14ec3941 Mon Sep 17 00:00:00 2001
From: Matthias Reichl <hias@horus.com>
Date: Mon, 11 Apr 2022 10:35:39 +0800
Subject: [PATCH 121/340] tty: fix crash in release_tty if tty->port is not set

mainline inclusion
from mainline-v5.10-rc3
commit 4466d6d2f80c1193e0845d110277c56da77a6418
category: bugfix
bugzilla: 82118
CVE: N/A

--------------------------------

Commit 2ae0b31e0face ("tty: don't crash in tty_init_dev when missing
tty_port") didn't fully prevent the crash as the cleanup path in
tty_init_dev() calls release_tty() which dereferences tty->port
without checking it for non-null.

Add tty->port checks to release_tty to avoid the kernel crash.

Fixes: 2ae0b31e0face ("tty: don't crash in tty_init_dev when missing tty_port")
Signed-off-by: Matthias Reichl <hias@horus.com>
Link: https://lore.kernel.org/r/20201105123432.4448-1-hias@horus.com
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yi Yang <yiyang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/tty_io.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index dc522e994e80..708be6258609 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1512,10 +1512,12 @@ static void release_tty(struct tty_struct *tty, int idx)
 		tty->ops->shutdown(tty);
 	tty_save_termios(tty);
 	tty_driver_remove_tty(tty->driver, tty);
-	tty->port->itty = NULL;
+	if (tty->port)
+		tty->port->itty = NULL;
 	if (tty->link)
 		tty->link->port->itty = NULL;
-	tty_buffer_cancel_work(tty->port);
+	if (tty->port)
+		tty_buffer_cancel_work(tty->port);
 	if (tty->link)
 		tty_buffer_cancel_work(tty->link->port);
 
-- 
Gitee


From 4b4a6296372d5df1afa1f11db56f72c13abfc5cd Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 12 Apr 2022 15:55:21 +0800
Subject: [PATCH 122/340] PM: wakeup: simplify the output logic of
 pm_show_wakelocks()

stable inclusion
from linux-4.19.228
commit cf94a8a3ff3d7aa06742f1beb5166fa45ba72122

--------------------------------

commit c9d967b2ce40d71e968eb839f36c936b8a9cf1ea upstream.

The buffer handling in pm_show_wakelocks() is tricky, and hopefully
correct.  Ensure it really is correct by using sysfs_emit_at() which
handles all of the tricky string handling logic in a PAGE_SIZE buffer
for us automatically as this is a sysfs file being read from.

Reviewed-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/power/wakelock.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/kernel/power/wakelock.c b/kernel/power/wakelock.c
index 4210152e56f0..aad7c8fcb22f 100644
--- a/kernel/power/wakelock.c
+++ b/kernel/power/wakelock.c
@@ -39,23 +39,19 @@ ssize_t pm_show_wakelocks(char *buf, bool show_active)
 {
 	struct rb_node *node;
 	struct wakelock *wl;
-	char *str = buf;
-	char *end = buf + PAGE_SIZE;
+	int len = 0;
 
 	mutex_lock(&wakelocks_lock);
 
 	for (node = rb_first(&wakelocks_tree); node; node = rb_next(node)) {
 		wl = rb_entry(node, struct wakelock, node);
 		if (wl->ws.active == show_active)
-			str += scnprintf(str, end - str, "%s ", wl->name);
+			len += sysfs_emit_at(buf, len, "%s ", wl->name);
 	}
-	if (str > buf)
-		str--;
-
-	str += scnprintf(str, end - str, "\n");
+	len += sysfs_emit_at(buf, len, "\n");
 
 	mutex_unlock(&wakelocks_lock);
-	return (str - buf);
+	return len;
 }
 
 #if CONFIG_PM_WAKELOCKS_LIMIT > 0
-- 
Gitee


From cead80db71f5bcd0ee035c33837d7e1b750d022e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 12 Apr 2022 15:55:22 +0800
Subject: [PATCH 123/340] netfilter: nft_payload: do not update layer 4
 checksum when mangling fragments

stable inclusion
from linux-4.19.228
commit a622b1fdfe1c725a6e9cf34d04e92f5c36a99606

--------------------------------

commit 4e1860a3863707e8177329c006d10f9e37e097a8 upstream.

IP fragments do not come with the transport header, hence skip bogus
layer 4 checksum updates.

Fixes: 1814096980bb ("netfilter: nft_payload: layer 4 checksum adjustment for pseudoheader fields")
Reported-and-tested-by: Steffen Weinreich <steve@weinreich.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/netfilter/nft_payload.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index b1a9f330a51f..fd87216bc0a9 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -194,6 +194,9 @@ static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt,
 				     struct sk_buff *skb,
 				     unsigned int *l4csum_offset)
 {
+	if (pkt->xt.fragoff)
+		return -1;
+
 	switch (pkt->tprot) {
 	case IPPROTO_TCP:
 		*l4csum_offset = offsetof(struct tcphdr, check);
-- 
Gitee


From 1dd442ba8899cc84d8d882c8367c620796cf6d7f Mon Sep 17 00:00:00 2001
From: Robert Hancock <robert.hancock@calian.com>
Date: Tue, 12 Apr 2022 15:55:23 +0800
Subject: [PATCH 124/340] serial: 8250: of: Fix mapped region size when using
 reg-offset property

stable inclusion
from linux-4.19.228
commit ff6cb8fc12edee1a4cfe5f2915b06a4d6a5f9d1f

--------------------------------

commit d06b1cf28297e27127d3da54753a3a01a2fa2f28 upstream.

8250_of supports a reg-offset property which is intended to handle
cases where the device registers start at an offset inside the region
of memory allocated to the device. The Xilinx 16550 UART, for which this
support was initially added, requires this. However, the code did not
adjust the overall size of the mapped region accordingly, causing the
driver to request an area of memory past the end of the device's
allocation. For example, if the UART was allocated an address of
0xb0130000, size of 0x10000 and reg-offset of 0x1000 in the device
tree, the region of memory reserved was b0131000-b0140fff, which caused
the driver for the region starting at b0140000 to fail to probe.

Fix this by subtracting reg-offset from the mapped region size.

Fixes: b912b5e2cfb3 ([POWERPC] Xilinx: of_serial support for Xilinx uart 16550.)
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Robert Hancock <robert.hancock@calian.com>
Link: https://lore.kernel.org/r/20220112194214.881844-1-robert.hancock@calian.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/8250/8250_of.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_of.c b/drivers/tty/serial/8250/8250_of.c
index 2488de1c4bc4..dcd2105e2b60 100644
--- a/drivers/tty/serial/8250/8250_of.c
+++ b/drivers/tty/serial/8250/8250_of.c
@@ -104,8 +104,17 @@ static int of_platform_serial_setup(struct platform_device *ofdev,
 		port->mapsize = resource_size(&resource);
 
 		/* Check for shifted address mapping */
-		if (of_property_read_u32(np, "reg-offset", &prop) == 0)
+		if (of_property_read_u32(np, "reg-offset", &prop) == 0) {
+			if (prop >= port->mapsize) {
+				dev_warn(&ofdev->dev, "reg-offset %u exceeds region size %pa\n",
+					 prop, &port->mapsize);
+				ret = -EINVAL;
+				goto err_unprepare;
+			}
+
 			port->mapbase += prop;
+			port->mapsize -= prop;
+		}
 
 		port->iotype = UPIO_MEM;
 		if (of_property_read_u32(np, "reg-io-width", &prop) == 0) {
-- 
Gitee


From 4fb55bc78cc043e919c58f52489eafdc9acb311b Mon Sep 17 00:00:00 2001
From: Valentin Caron <valentin.caron@foss.st.com>
Date: Tue, 12 Apr 2022 15:55:24 +0800
Subject: [PATCH 125/340] serial: stm32: fix software flow control transfer

stable inclusion
from linux-4.19.228
commit a41398a5a1a0dde794c65ef1fa6b2323e2db68bc

--------------------------------

commit 037b91ec7729524107982e36ec4b40f9b174f7a2 upstream.

x_char is ignored by stm32_usart_start_tx() when xmit buffer is empty.

Fix start_tx condition to allow x_char to be sent.

Fixes: 48a6092fb41f ("serial: stm32-usart: Add STM32 USART Driver")
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Erwan Le Ray <erwan.leray@foss.st.com>
Signed-off-by: Valentin Caron <valentin.caron@foss.st.com>
Link: https://lore.kernel.org/r/20220111164441.6178-3-valentin.caron@foss.st.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/stm32-usart.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
index e8d7a7bb4339..f761e1038c34 100644
--- a/drivers/tty/serial/stm32-usart.c
+++ b/drivers/tty/serial/stm32-usart.c
@@ -506,7 +506,7 @@ static void stm32_start_tx(struct uart_port *port)
 {
 	struct circ_buf *xmit = &port->state->xmit;
 
-	if (uart_circ_empty(xmit))
+	if (uart_circ_empty(xmit) && !port->x_char)
 		return;
 
 	stm32_transmit_chars(port);
-- 
Gitee


From 9526d3d74a63d2e1b4805e6b0ada3333910fefe9 Mon Sep 17 00:00:00 2001
From: "daniel.starke@siemens.com" <daniel.starke@siemens.com>
Date: Tue, 12 Apr 2022 15:55:25 +0800
Subject: [PATCH 126/340] tty: n_gsm: fix SW flow control encoding/handling

stable inclusion
from linux-4.19.228
commit 47bc9be8d6d94a25a598b4056078925dd6d15851

--------------------------------

commit 8838b2af23caf1ff0610caef2795d6668a013b2d upstream.

n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010.
See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516
The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to
the newer 27.010 here. Chapter 5.2.7.3 states that DC1 (XON) and DC3 (XOFF)
are the control characters defined in ISO/IEC 646. These shall be quoted if
seen in the data stream to avoid interpretation as flow control characters.

ISO/IEC 646 refers to the set of ISO standards described as the ISO
7-bit coded character set for information interchange. Its final version
is also known as ITU T.50.
See https://www.itu.int/rec/T-REC-T.50-199209-I/en

To abide the standard it is needed to quote DC1 and DC3 correctly if these
are seen as data bytes and not as control characters. The current
implementation already tries to enforce this but fails to catch all
defined cases. 3GPP 27.010 chapter 5.2.7.3 clearly states that the most
significant bit shall be ignored for DC1 and DC3 handling. The current
implementation handles only the case with the most significant bit set 0.
Cases in which DC1 and DC3 have the most significant bit set 1 are left
unhandled.

This patch fixes this by masking the data bytes with ISO_IEC_646_MASK (only
the 7 least significant bits set 1) before comparing them with XON
(a.k.a. DC1) and XOFF (a.k.a. DC3) when testing which byte values need
quotation via byte stuffing.

Fixes: e1eaea46bb40 ("tty: n_gsm line discipline")
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Link: https://lore.kernel.org/r/20220120101857.2509-1-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/n_gsm.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 86b7e20ffd7f..7de54c78a7b9 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -313,6 +313,7 @@ static struct tty_driver *gsm_tty_driver;
 #define GSM1_ESCAPE_BITS	0x20
 #define XON			0x11
 #define XOFF			0x13
+#define ISO_IEC_646_MASK	0x7F
 
 static const struct tty_port_operations gsm_port_ops;
 
@@ -531,7 +532,8 @@ static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
 	int olen = 0;
 	while (len--) {
 		if (*input == GSM1_SOF || *input == GSM1_ESCAPE
-		    || *input == XON || *input == XOFF) {
+		    || (*input & ISO_IEC_646_MASK) == XON
+		    || (*input & ISO_IEC_646_MASK) == XOFF) {
 			*output++ = GSM1_ESCAPE;
 			*output++ = *input++ ^ GSM1_ESCAPE_BITS;
 			olen++;
-- 
Gitee


From 2069e835d6c7440764f80db4ba33ad1e88b1bdc8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@nvidia.com>
Date: Tue, 12 Apr 2022 15:55:26 +0800
Subject: [PATCH 127/340] ipv6_tunnel: Rate limit warning messages

stable inclusion
from linux-4.19.228
commit e3e01ed702c7b5f5d306c2ef566d30b84aa57126

--------------------------------

commit 6cee105e7f2ced596373951d9ea08dacc3883c68 upstream.

The warning messages can be invoked from the data path for every packet
transmitted through an ip6gre netdev, leading to high CPU utilization.

Fix that by rate limiting the messages.

Fixes: 09c6bbf090ec ("[IPV6]: Do mandatory IPv6 tunnel endpoint checks in realtime")
Reported-by: Maksym Yaremchuk <maksymy@nvidia.com>
Tested-by: Maksym Yaremchuk <maksymy@nvidia.com>
Signed-off-by: Ido Schimmel <idosch@nvidia.com>
Reviewed-by: Amit Cohen <amcohen@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv6/ip6_tunnel.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 35c127c3eee7..b647a4037679 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1005,14 +1005,14 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t,
 
 		if (unlikely(!ipv6_chk_addr_and_flags(net, laddr, ldev, false,
 						      0, IFA_F_TENTATIVE)))
-			pr_warn("%s xmit: Local address not yet configured!\n",
-				p->name);
+			pr_warn_ratelimited("%s xmit: Local address not yet configured!\n",
+					    p->name);
 		else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) &&
 			 !ipv6_addr_is_multicast(raddr) &&
 			 unlikely(ipv6_chk_addr_and_flags(net, raddr, ldev,
 							  true, 0, IFA_F_TENTATIVE)))
-			pr_warn("%s xmit: Routing loop! Remote address found on this node!\n",
-				p->name);
+			pr_warn_ratelimited("%s xmit: Routing loop! Remote address found on this node!\n",
+					    p->name);
 		else
 			ret = 1;
 		rcu_read_unlock();
-- 
Gitee


From 711eff8f8f8ef22caf8282b927633d9aebc06ffb Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 12 Apr 2022 15:55:27 +0800
Subject: [PATCH 128/340] ping: fix the sk_bound_dev_if match in ping_lookup

stable inclusion
from linux-4.19.228
commit a921507836877c4e28084f7bfd35ac2608321268

--------------------------------

commit 2afc3b5a31f9edf3ef0f374f5d70610c79c93a42 upstream.

When 'ping' changes to use PING socket instead of RAW socket by:

   # sysctl -w net.ipv4.ping_group_range="0 100"

the selftests 'router_broadcast.sh' will fail, as such command

  # ip vrf exec vrf-h1 ping -I veth0 198.51.100.255 -b

can't receive the response skb by the PING socket. It's caused by mismatch
of sk_bound_dev_if and dif in ping_rcv() when looking up the PING socket,
as dif is vrf-h1 if dif's master was set to vrf-h1.

This patch is to fix this regression by also checking the sk_bound_dev_if
against sdif so that the packets can stil be received even if the socket
is not bound to the vrf device but to the real iif.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Reported-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/ping.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 862744c28548..276442443d32 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -225,7 +225,8 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 			continue;
 		}
 
-		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)
+		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
+		    sk->sk_bound_dev_if != inet_sdif(skb))
 			continue;
 
 		sock_hold(sk);
-- 
Gitee


From b8ab968eaf1b13f1b7a591aea1df65dff3696123 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:28 +0800
Subject: [PATCH 129/340] ipv4: avoid using shared IP generator for connected
 sockets

stable inclusion
from linux-4.19.228
commit eb04c6d1ec67e30f3aa5ef82112cbfdbddfd4f65

--------------------------------

commit 23f57406b82de51809d5812afd96f210f8b627f3 upstream.

ip_select_ident_segs() has been very conservative about using
the connected socket private generator only for packets with IP_DF
set, claiming it was needed for some VJ compression implementations.

As mentioned in this referenced document, this can be abused.
(Ref: Off-Path TCP Exploits of the Mixed IPID Assignment)

Before switching to pure random IPID generation and possibly hurt
some workloads, lets use the private inet socket generator.

Not only this will remove one vulnerability, this will also
improve performance of TCP flows using pmtudisc==IP_PMTUDISC_DONT

Fixes: 73f156a6e8c1 ("inetpeer: get rid of ip_id_count")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Reported-by: Ray Che <xijiache@gmail.com>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/net/ip.h | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 17766586dfa0..621eeb9f142e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -437,19 +437,18 @@ static inline void ip_select_ident_segs(struct net *net, struct sk_buff *skb,
 {
 	struct iphdr *iph = ip_hdr(skb);
 
+	/* We had many attacks based on IPID, use the private
+	 * generator as much as we can.
+	 */
+	if (sk && inet_sk(sk)->inet_daddr) {
+		iph->id = htons(inet_sk(sk)->inet_id);
+		inet_sk(sk)->inet_id += segs;
+		return;
+	}
 	if ((iph->frag_off & htons(IP_DF)) && !skb->ignore_df) {
-		/* This is only to work around buggy Windows95/2000
-		 * VJ compression implementations.  If the ID field
-		 * does not change, they drop every other packet in
-		 * a TCP stream using header compression.
-		 */
-		if (sk && inet_sk(sk)->inet_daddr) {
-			iph->id = htons(inet_sk(sk)->inet_id);
-			inet_sk(sk)->inet_id += segs;
-		} else {
-			iph->id = 0;
-		}
+		iph->id = 0;
 	} else {
+		/* Unfortunately we need the big hammer to get a suitable IPID */
 		__ip_select_ident(net, iph, segs);
 	}
 }
-- 
Gitee


From 891412134fbd7389e6aed6b117f62e7db657513a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:29 +0800
Subject: [PATCH 130/340] ipv6: annotate accesses to fn->fn_sernum

stable inclusion
from linux-4.19.228
commit 243e898452adcf6055c9b2632becef7c59f02874

--------------------------------

commit aafc2e3285c2d7a79b7ee15221c19fbeca7b1509 upstream.

struct fib6_node's fn_sernum field can be
read while other threads change it.

Add READ_ONCE()/WRITE_ONCE() annotations.

Do not change existing smp barriers in fib6_get_cookie_safe()
and __fib6_update_sernum_upto_root()

syzbot reported:

BUG: KCSAN: data-race in fib6_clean_node / inet6_csk_route_socket

write to 0xffff88813df62e2c of 4 bytes by task 1920 on cpu 1:
 fib6_clean_node+0xc2/0x260 net/ipv6/ip6_fib.c:2178
 fib6_walk_continue+0x38e/0x430 net/ipv6/ip6_fib.c:2112
 fib6_walk net/ipv6/ip6_fib.c:2160 [inline]
 fib6_clean_tree net/ipv6/ip6_fib.c:2240 [inline]
 __fib6_clean_all+0x1a9/0x2e0 net/ipv6/ip6_fib.c:2256
 fib6_flush_trees+0x6c/0x80 net/ipv6/ip6_fib.c:2281
 rt_genid_bump_ipv6 include/net/net_namespace.h:488 [inline]
 addrconf_dad_completed+0x57f/0x870 net/ipv6/addrconf.c:4230
 addrconf_dad_work+0x908/0x1170
 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
 worker_thread+0x616/0xa70 kernel/workqueue.c:2454
 kthread+0x1bf/0x1e0 kernel/kthread.c:359
 ret_from_fork+0x1f/0x30

read to 0xffff88813df62e2c of 4 bytes by task 15701 on cpu 0:
 fib6_get_cookie_safe include/net/ip6_fib.h:285 [inline]
 rt6_get_cookie include/net/ip6_fib.h:306 [inline]
 ip6_dst_store include/net/ip6_route.h:234 [inline]
 inet6_csk_route_socket+0x352/0x3c0 net/ipv6/inet6_connection_sock.c:109
 inet6_csk_xmit+0x91/0x1e0 net/ipv6/inet6_connection_sock.c:121
 __tcp_transmit_skb+0x1323/0x1840 net/ipv4/tcp_output.c:1402
 tcp_transmit_skb net/ipv4/tcp_output.c:1420 [inline]
 tcp_write_xmit+0x1450/0x4460 net/ipv4/tcp_output.c:2680
 __tcp_push_pending_frames+0x68/0x1c0 net/ipv4/tcp_output.c:2864
 tcp_push+0x2d9/0x2f0 net/ipv4/tcp.c:725
 mptcp_push_release net/mptcp/protocol.c:1491 [inline]
 __mptcp_push_pending+0x46c/0x490 net/mptcp/protocol.c:1578
 mptcp_sendmsg+0x9ec/0xa50 net/mptcp/protocol.c:1764
 inet6_sendmsg+0x5f/0x80 net/ipv6/af_inet6.c:643
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 kernel_sendmsg+0x97/0xd0 net/socket.c:745
 sock_no_sendpage+0x84/0xb0 net/core/sock.c:3086
 inet_sendpage+0x9d/0xc0 net/ipv4/af_inet.c:834
 kernel_sendpage+0x187/0x200 net/socket.c:3492
 sock_sendpage+0x5a/0x70 net/socket.c:1007
 pipe_to_sendpage+0x128/0x160 fs/splice.c:364
 splice_from_pipe_feed fs/splice.c:418 [inline]
 __splice_from_pipe+0x207/0x500 fs/splice.c:562
 splice_from_pipe fs/splice.c:597 [inline]
 generic_splice_sendpage+0x94/0xd0 fs/splice.c:746
 do_splice_from fs/splice.c:767 [inline]
 direct_splice_actor+0x80/0xa0 fs/splice.c:936
 splice_direct_to_actor+0x345/0x650 fs/splice.c:891
 do_splice_direct+0x106/0x190 fs/splice.c:979
 do_sendfile+0x675/0xc40 fs/read_write.c:1245
 __do_sys_sendfile64 fs/read_write.c:1310 [inline]
 __se_sys_sendfile64 fs/read_write.c:1296 [inline]
 __x64_sys_sendfile64+0x102/0x140 fs/read_write.c:1296
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000026f -> 0x00000271

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 15701 Comm: syz-executor.2 Not tainted 5.16.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

The Fixes tag I chose is probably arbitrary, I do not think
we need to backport this patch to older kernels.

Fixes: c5cff8561d2d ("ipv6: add rcu grace period before freeing fib6_node")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220120174112.1126644-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/net/ip6_fib.h |  2 +-
 net/ipv6/ip6_fib.c    | 23 +++++++++++++----------
 net/ipv6/route.c      |  2 +-
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 62c936230cc8..b4fea9dd589d 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -243,7 +243,7 @@ static inline bool fib6_get_cookie_safe(const struct fib6_info *f6i,
 	fn = rcu_dereference(f6i->fib6_node);
 
 	if (fn) {
-		*cookie = fn->fn_sernum;
+		*cookie = READ_ONCE(fn->fn_sernum);
 		/* pairs with smp_wmb() in fib6_update_sernum_upto_root() */
 		smp_rmb();
 		status = true;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index e0e464b72c1f..5ff67cb8b6ac 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -112,7 +112,7 @@ void fib6_update_sernum(struct net *net, struct fib6_info *f6i)
 	fn = rcu_dereference_protected(f6i->fib6_node,
 			lockdep_is_held(&f6i->fib6_table->tb6_lock));
 	if (fn)
-		fn->fn_sernum = fib6_new_sernum(net);
+		WRITE_ONCE(fn->fn_sernum, fib6_new_sernum(net));
 }
 
 /*
@@ -544,12 +544,13 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb,
 		spin_unlock_bh(&table->tb6_lock);
 		if (res > 0) {
 			cb->args[4] = 1;
-			cb->args[5] = w->root->fn_sernum;
+			cb->args[5] = READ_ONCE(w->root->fn_sernum);
 		}
 	} else {
-		if (cb->args[5] != w->root->fn_sernum) {
+		int sernum = READ_ONCE(w->root->fn_sernum);
+		if (cb->args[5] != sernum) {
 			/* Begin at the root if the tree changed */
-			cb->args[5] = w->root->fn_sernum;
+			cb->args[5] = sernum;
 			w->state = FWS_INIT;
 			w->node = w->root;
 			w->skip = w->count;
@@ -1203,7 +1204,7 @@ static void __fib6_update_sernum_upto_root(struct fib6_info *rt,
 	/* paired with smp_rmb() in rt6_get_cookie_safe() */
 	smp_wmb();
 	while (fn) {
-		fn->fn_sernum = sernum;
+		WRITE_ONCE(fn->fn_sernum, sernum);
 		fn = rcu_dereference_protected(fn->parent,
 				lockdep_is_held(&rt->fib6_table->tb6_lock));
 	}
@@ -1983,8 +1984,8 @@ static int fib6_clean_node(struct fib6_walker *w)
 	};
 
 	if (c->sernum != FIB6_NO_SERNUM_CHANGE &&
-	    w->node->fn_sernum != c->sernum)
-		w->node->fn_sernum = c->sernum;
+	    READ_ONCE(w->node->fn_sernum) != c->sernum)
+		WRITE_ONCE(w->node->fn_sernum, c->sernum);
 
 	if (!c->func) {
 		WARN_ON_ONCE(c->sernum == FIB6_NO_SERNUM_CHANGE);
@@ -2332,7 +2333,7 @@ static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter,
 	iter->w.state = FWS_INIT;
 	iter->w.node = iter->w.root;
 	iter->w.args = iter;
-	iter->sernum = iter->w.root->fn_sernum;
+	iter->sernum = READ_ONCE(iter->w.root->fn_sernum);
 	INIT_LIST_HEAD(&iter->w.lh);
 	fib6_walker_link(net, &iter->w);
 }
@@ -2360,8 +2361,10 @@ static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl,
 
 static void ipv6_route_check_sernum(struct ipv6_route_iter *iter)
 {
-	if (iter->sernum != iter->w.root->fn_sernum) {
-		iter->sernum = iter->w.root->fn_sernum;
+	int sernum = READ_ONCE(iter->w.root->fn_sernum);
+
+	if (iter->sernum != sernum) {
+		iter->sernum = sernum;
 		iter->w.state = FWS_INIT;
 		iter->w.node = iter->w.root;
 		WARN_ON(iter->w.skip);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 80dd55c436fa..8efb03cc6f24 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2320,7 +2320,7 @@ static void ip6_link_failure(struct sk_buff *skb)
 			if (from) {
 				fn = rcu_dereference(from->fib6_node);
 				if (fn && (rt->rt6i_flags & RTF_DEFAULT))
-					fn->fn_sernum = -1;
+					WRITE_ONCE(fn->fn_sernum, -1);
 			}
 		}
 		rcu_read_unlock();
-- 
Gitee


From ff03d08f44db106a7a4cea906cda6029eeaf479c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:55:30 +0800
Subject: [PATCH 131/340] NFS: Ensure the server has an up to date ctime before
 hardlinking

stable inclusion
from linux-4.19.228
commit ddeea0002d17984d66bb63f9b66b96fd604756ad

--------------------------------

[ Upstream commit 204975036b34f55237bc44c8a302a88468ef21b5 ]

Creating a hard link is required by POSIX to update the file ctime, so
ensure that the file data is synced to disk so that we don't clobber the
updated ctime by writing back after creating the hard link.

Fixes: 9f7682728728 ("NFS: Move the delegation return down into nfs4_proc_link()")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/dir.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 67ece39c7800..ede35472280d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2052,6 +2052,8 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 
 	trace_nfs_link_enter(inode, dir, dentry);
 	d_drop(dentry);
+	if (S_ISREG(inode->i_mode))
+		nfs_sync_inode(inode);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
 		ihold(inode);
-- 
Gitee


From a2e1b59ecb07811efb3b07a6ec50723b8e05acea Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:55:31 +0800
Subject: [PATCH 132/340] NFS: Ensure the server has an up to date ctime before
 renaming

stable inclusion
from linux-4.19.228
commit 8b5c9de150c62f686ff274039b97fe3a2297ada6

--------------------------------

[ Upstream commit 6ff9d99bb88faebf134ca668842349d9718e5464 ]

Renaming a file is required by POSIX to update the file ctime, so
ensure that the file data is synced to disk so that we don't clobber the
updated ctime by writing back after creating the hard link.

Fixes: f2c2c552f119 ("NFS: Move delegation recall into the NFSv4 callback for rename_setup()")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/dir.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index ede35472280d..72e5ae8cc4ff 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2142,6 +2142,8 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 		}
 	}
 
+	if (S_ISREG(old_inode->i_mode))
+		nfs_sync_inode(old_inode);
 	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, NULL);
 	if (IS_ERR(task)) {
 		error = PTR_ERR(task);
-- 
Gitee


From 4540e9afdf2d1b7f3463d3a8ac7c92a95926130a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Marek=20Beh=C3=BAn?= <kabel@kernel.org>
Date: Tue, 12 Apr 2022 15:55:32 +0800
Subject: [PATCH 133/340] phylib: fix potential use-after-free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.228
commit 67d271760b037ce0806d687ee6057edc8afd4205

--------------------------------

[ Upstream commit cbda1b16687580d5beee38273f6241ae3725960c ]

Commit bafbdd527d56 ("phylib: Add device reset GPIO support") added call
to phy_device_reset(phydev) after the put_device() call in phy_detach().

The comment before the put_device() call says that the phydev might go
away with put_device().

Fix potential use-after-free by calling phy_device_reset() before
put_device().

Fixes: bafbdd527d56 ("phylib: Add device reset GPIO support")
Signed-off-by: Marek Behún <kabel@kernel.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Link: https://lore.kernel.org/r/20220119162748.32418-1-kabel@kernel.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/phy/phy_device.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 1117355313bc..2de2cbed7c5f 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1154,6 +1154,9 @@ void phy_detach(struct phy_device *phydev)
 	    phydev->mdio.dev.driver == &genphy_driver.mdiodrv.driver)
 		device_release_driver(&phydev->mdio.dev);
 
+	/* Assert the reset signal */
+	phy_device_reset(phydev, 1);
+
 	/*
 	 * The phydev might go away on the put_device() below, so avoid
 	 * a use-after-free bug by reading the underlying bus first.
@@ -1163,9 +1166,6 @@ void phy_detach(struct phy_device *phydev)
 	put_device(&phydev->mdio.dev);
 	if (ndev_owner != bus->owner)
 		module_put(bus->owner);
-
-	/* Assert the reset signal */
-	phy_device_reset(phydev, 1);
 }
 EXPORT_SYMBOL(phy_detach);
 
-- 
Gitee


From 7282d57e98b3bbceba7aa3a9eaf9c2e3a2d6c5b7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:33 +0800
Subject: [PATCH 134/340] ipv4: raw: lock the socket in raw_bind()

stable inclusion
from linux-4.19.228
commit 3edc526019751c551a05b3ef465d07678fd239de

--------------------------------

[ Upstream commit 153a0d187e767c68733b8e9f46218eb1f41ab902 ]

For some reason, raw_bind() forgot to lock the socket.

BUG: KCSAN: data-race in __ip4_datagram_connect / raw_bind

write to 0xffff8881170d4308 of 4 bytes by task 5466 on cpu 0:
 raw_bind+0x1b0/0x250 net/ipv4/raw.c:739
 inet_bind+0x56/0xa0 net/ipv4/af_inet.c:443
 __sys_bind+0x14b/0x1b0 net/socket.c:1697
 __do_sys_bind net/socket.c:1708 [inline]
 __se_sys_bind net/socket.c:1706 [inline]
 __x64_sys_bind+0x3d/0x50 net/socket.c:1706
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff8881170d4308 of 4 bytes by task 5468 on cpu 1:
 __ip4_datagram_connect+0xb7/0x7b0 net/ipv4/datagram.c:39
 ip4_datagram_connect+0x2a/0x40 net/ipv4/datagram.c:89
 inet_dgram_connect+0x107/0x190 net/ipv4/af_inet.c:576
 __sys_connect_file net/socket.c:1900 [inline]
 __sys_connect+0x197/0x1b0 net/socket.c:1917
 __do_sys_connect net/socket.c:1927 [inline]
 __se_sys_connect net/socket.c:1924 [inline]
 __x64_sys_connect+0x3d/0x50 net/socket.c:1924
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000000 -> 0x0003007f

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 5468 Comm: syz-executor.5 Not tainted 5.17.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/raw.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index bc3da7796e8a..a24f94abe14f 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -724,6 +724,7 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	int ret = -EINVAL;
 	int chk_addr_ret;
 
+	lock_sock(sk);
 	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_in))
 		goto out;
 
@@ -743,7 +744,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 		inet->inet_saddr = 0;  /* Use device */
 	sk_dst_reset(sk);
 	ret = 0;
-out:	return ret;
+out:
+	release_sock(sk);
+	return ret;
 }
 
 /*
-- 
Gitee


From a816d3f78a6bda458969bf09ec173882591dbc84 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:34 +0800
Subject: [PATCH 135/340] ipv4: tcp: send zero IPID in SYNACK messages

stable inclusion
from linux-4.19.228
commit 08ed3cabc07e9f6035466e33fbdfad1978e05e06

--------------------------------

[ Upstream commit 970a5a3ea86da637471d3cd04d513a0755aba4bf ]

In commit 431280eebed9 ("ipv4: tcp: send zero IPID for RST and
ACK sent in SYN-RECV and TIME-WAIT state") we took care of some
ctl packets sent by TCP.

It turns out we need to use a similar strategy for SYNACK packets.

By default, they carry IP_DF and IPID==0, but there are ways
to ask them to use the hashed IP ident generator and thus
be used to build off-path attacks.
(Ref: Off-Path TCP Exploits of the Mixed IPID Assignment)

One of this way is to force (before listener is started)
echo 1 >/proc/sys/net/ipv4/ip_no_pmtu_disc

Another way is using forged ICMP ICMP_FRAG_NEEDED
with a very small MTU (like 68) to force a false return from
ip_dont_fragment()

In this patch, ip_build_and_send_pkt() uses the following
heuristics.

1) Most SYNACK packets are smaller than IPV4_MIN_MTU and therefore
can use IP_DF regardless of the listener or route pmtu setting.

2) In case the SYNACK packet is bigger than IPV4_MIN_MTU,
we use prandom_u32() generator instead of the IPv4 hashed ident one.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Ray Che <xijiache@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Cc: Geoff Alexander <alexandg@cs.unm.edu>
Cc: Willy Tarreau <w@1wt.eu>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/ip_output.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e63905f7f6f9..1a6c05908584 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -160,12 +160,19 @@ int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
 	iph->daddr    = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
 	iph->saddr    = saddr;
 	iph->protocol = sk->sk_protocol;
-	if (ip_dont_fragment(sk, &rt->dst)) {
+	/* Do not bother generating IPID for small packets (eg SYNACK) */
+	if (skb->len <= IPV4_MIN_MTU || ip_dont_fragment(sk, &rt->dst)) {
 		iph->frag_off = htons(IP_DF);
 		iph->id = 0;
 	} else {
 		iph->frag_off = 0;
-		__ip_select_ident(net, iph, 1);
+		/* TCP packets here are SYNACK with fat IPv4/TCP options.
+		 * Avoid using the hashed IP ident generator.
+		 */
+		if (sk->sk_protocol == IPPROTO_TCP)
+			iph->id = (__force __be16)prandom_u32();
+		else
+			__ip_select_ident(net, iph, 1);
 	}
 
 	if (opt && opt->opt.optlen) {
-- 
Gitee


From 7865710a1bc0a52cbed5636c530cba97902f3847 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:35 +0800
Subject: [PATCH 136/340] rtnetlink: make sure to refresh master_dev/m_ops in
 __rtnl_newlink()

stable inclusion
from linux-4.19.228
commit a01e60a1ec6bef9be471fb7182a33c6d6f124e93

--------------------------------

commit c6f6f2444bdbe0079e41914a35081530d0409963 upstream.

While looking at one unrelated syzbot bug, I found the replay logic
in __rtnl_newlink() to potentially trigger use-after-free.

It is better to clear master_dev and m_ops inside the loop,
in case we have to replay it.

Fixes: ba7d49b1f0f8 ("rtnetlink: provide api for getting and setting slave info")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jiri Pirko <jiri@nvidia.com>
Link: https://lore.kernel.org/r/20220201012106.216495-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/rtnetlink.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 83de32e34bb5..6cd0dbe42baa 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2936,9 +2936,9 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct net *net = sock_net(skb->sk);
 	const struct rtnl_link_ops *ops;
-	const struct rtnl_link_ops *m_ops = NULL;
+	const struct rtnl_link_ops *m_ops;
 	struct net_device *dev;
-	struct net_device *master_dev = NULL;
+	struct net_device *master_dev;
 	struct ifinfomsg *ifm;
 	char kind[MODULE_NAME_LEN];
 	char ifname[IFNAMSIZ];
@@ -2973,6 +2973,8 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
 			dev = NULL;
 	}
 
+	master_dev = NULL;
+	m_ops = NULL;
 	if (dev) {
 		master_dev = netdev_master_upper_dev_get(dev);
 		if (master_dev)
-- 
Gitee


From b1e20c74a3f035c6a93f2fd9e179a1af6c4df6af Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:36 +0800
Subject: [PATCH 137/340] af_packet: fix data-race in packet_setsockopt /
 packet_setsockopt

stable inclusion
from linux-4.19.228
commit 1f0c712832907baef27d13721b3ca2e515d39ecf

--------------------------------

commit e42e70ad6ae2ae511a6143d2e8da929366e58bd9 upstream.

When packet_setsockopt( PACKET_FANOUT_DATA ) reads po->fanout,
no lock is held, meaning that another thread can change po->fanout.

Given that po->fanout can only be set once during the socket lifetime
(it is only cleared from fanout_release()), we can use
READ_ONCE()/WRITE_ONCE() to document the race.

BUG: KCSAN: data-race in packet_setsockopt / packet_setsockopt

write to 0xffff88813ae8e300 of 8 bytes by task 14653 on cpu 0:
 fanout_add net/packet/af_packet.c:1791 [inline]
 packet_setsockopt+0x22fe/0x24a0 net/packet/af_packet.c:3931
 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
 __do_sys_setsockopt net/socket.c:2191 [inline]
 __se_sys_setsockopt net/socket.c:2188 [inline]
 __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff88813ae8e300 of 8 bytes by task 14654 on cpu 1:
 packet_setsockopt+0x691/0x24a0 net/packet/af_packet.c:3935
 __sys_setsockopt+0x209/0x2a0 net/socket.c:2180
 __do_sys_setsockopt net/socket.c:2191 [inline]
 __se_sys_setsockopt net/socket.c:2188 [inline]
 __x64_sys_setsockopt+0x62/0x70 net/socket.c:2188
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x0000000000000000 -> 0xffff888106f8c000

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 14654 Comm: syz-executor.3 Not tainted 5.16.0-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 47dceb8ecdc1 ("packet: add classic BPF fanout mode")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220201022358.330621-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/packet/af_packet.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fbf7d5ef83c7..bacb1bbe53e2 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1729,7 +1729,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		err = -ENOSPC;
 		if (refcount_read(&match->sk_ref) < PACKET_FANOUT_MAX) {
 			__dev_remove_pack(&po->prot_hook);
-			po->fanout = match;
+
+			/* Paired with packet_setsockopt(PACKET_FANOUT_DATA) */
+			WRITE_ONCE(po->fanout, match);
+
 			po->rollover = rollover;
 			rollover = NULL;
 			refcount_set(&match->sk_ref, refcount_read(&match->sk_ref) + 1);
@@ -3871,7 +3874,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 	}
 	case PACKET_FANOUT_DATA:
 	{
-		if (!po->fanout)
+		/* Paired with the WRITE_ONCE() in fanout_add() */
+		if (!READ_ONCE(po->fanout))
 			return -EINVAL;
 
 		return fanout_set_data(po, optval, optlen);
-- 
Gitee


From f4af292adec47119344da5c87127724ecc7191d5 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 12 Apr 2022 15:55:37 +0800
Subject: [PATCH 138/340] block: bio-integrity: Advance seed correctly for
 larger interval sizes

stable inclusion
from linux-4.19.228
commit d21b6bbc78442bf490e7215ed620ec0878856a0e

--------------------------------

commit b13e0c71856817fca67159b11abac350e41289f5 upstream.

Commit 309a62fa3a9e ("bio-integrity: bio_integrity_advance must update
integrity seed") added code to update the integrity seed value when
advancing a bio. However, it failed to take into account that the
integrity interval might be larger than the 512-byte block layer
sector size. This broke bio splitting on PI devices with 4KB logical
blocks.

The seed value should be advanced by bio_integrity_intervals() and not
the number of sectors.

Cc: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: stable@vger.kernel.org
Fixes: 309a62fa3a9e ("bio-integrity: bio_integrity_advance must update integrity seed")
Tested-by: Dmitry Ivanov <dmitry.ivanov2@hpe.com>
Reported-by: Alexey Lyashkov <alexey.lyashkov@hpe.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Link: https://lore.kernel.org/r/20220204034209.4193-1-martin.petersen@oracle.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 block/bio-integrity.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 0b96220d0efd..2e22a3f7466a 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -399,7 +399,7 @@ void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
 	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
-	bip->bip_iter.bi_sector += bytes_done >> 9;
+	bip->bip_iter.bi_sector += bio_integrity_intervals(bi, bytes_done >> 9);
 	bvec_iter_advance(bip->bip_vec, &bip->bip_iter, bytes);
 }
 EXPORT_SYMBOL(bio_integrity_advance);
-- 
Gitee


From 75624b5bc0485778aef9f46ebbf5e5df3c0d054c Mon Sep 17 00:00:00 2001
From: Guoqing Jiang <guoqing.jiang@linux.dev>
Date: Tue, 12 Apr 2022 15:55:38 +0800
Subject: [PATCH 139/340] iommu/vt-d: Fix potential memory leak in
 intel_setup_irq_remapping()

stable inclusion
from linux-4.19.228
commit a31cb1f0fb6caf46ffe88c41252b6b7a4ee062d9

--------------------------------

commit 99e675d473eb8cf2deac1376a0f840222fc1adcf upstream.

After commit e3beca48a45b ("irqdomain/treewide: Keep firmware node
unconditionally allocated"). For tear down scenario, fn is only freed
after fail to allocate ir_domain, though it also should be freed in case
dmar_enable_qi returns error.

Besides free fn, irq_domain and ir_msi_domain need to be removed as well
if intel_setup_irq_remapping fails to enable queued invalidation.

Improve the rewinding path by add out_free_ir_domain and out_free_fwnode
lables per Baolu's suggestion.

Fixes: e3beca48a45b ("irqdomain/treewide: Keep firmware node unconditionally allocated")
Suggested-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Guoqing Jiang <guoqing.jiang@linux.dev>
Link: https://lore.kernel.org/r/20220119063640.16864-1-guoqing.jiang@linux.dev
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Link: https://lore.kernel.org/r/20220128031002.2219155-3-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/iommu/intel_irq_remapping.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 967450bd421a..276005451914 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -539,7 +539,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 	irq_domain_free_fwnode(fn);
 	if (!iommu->ir_domain) {
 		pr_err("IR%d: failed to allocate irqdomain\n", iommu->seq_id);
-		goto out_free_bitmap;
+		goto out_free_fwnode;
 	}
 	iommu->ir_msi_domain =
 		arch_create_remap_msi_irq_domain(iommu->ir_domain,
@@ -563,7 +563,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
 		if (dmar_enable_qi(iommu)) {
 			pr_err("Failed to enable queued invalidation\n");
-			goto out_free_bitmap;
+			goto out_free_ir_domain;
 		}
 	}
 
@@ -587,6 +587,14 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu)
 
 	return 0;
 
+out_free_ir_domain:
+	if (iommu->ir_msi_domain)
+		irq_domain_remove(iommu->ir_msi_domain);
+	iommu->ir_msi_domain = NULL;
+	irq_domain_remove(iommu->ir_domain);
+	iommu->ir_domain = NULL;
+out_free_fwnode:
+	irq_domain_free_fwnode(fn);
 out_free_bitmap:
 	kfree(bitmap);
 out_free_pages:
-- 
Gitee


From 0b3f58fe48e9825546b647b5faf58a3701427a5e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 12 Apr 2022 15:55:39 +0800
Subject: [PATCH 140/340] iommu/amd: Fix loop timeout issue in
 iommu_ga_log_enable()

stable inclusion
from linux-4.19.228
commit bd2a6021b8778bf671a2f9739241543398a7c853

--------------------------------

commit 9b45a7738eec52bf0f5d8d3d54e822962781c5f2 upstream.

The polling loop for the register change in iommu_ga_log_enable() needs
to have a udelay() in it.  Otherwise the CPU might be faster than the
IOMMU hardware and wrongly trigger the WARN_ON() further down the code
stream. Use a 10us for udelay(), has there is some hardware where
activation of the GA log can take more than a 100ms.

A future optimization should move the activation check of the GA log
to the point where it gets used for the first time. But that is a
bigger change and not suitable for a fix.

Fixes: 8bda0cfbdc1a ("iommu/amd: Detect and initialize guest vAPIC log")
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Link: https://lore.kernel.org/r/20220204115537.3894-1-joro@8bytes.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/iommu/amd_iommu_init.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c
index 1e9a5da562f0..29bf91e50d41 100644
--- a/drivers/iommu/amd_iommu_init.c
+++ b/drivers/iommu/amd_iommu_init.c
@@ -30,6 +30,7 @@
 #include <linux/iommu.h>
 #include <linux/kmemleak.h>
 #include <linux/mem_encrypt.h>
+#include <linux/iopoll.h>
 #include <asm/pci-direct.h>
 #include <asm/iommu.h>
 #include <asm/gart.h>
@@ -769,6 +770,7 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
 		status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
 		if (status & (MMIO_STATUS_GALOG_RUN_MASK))
 			break;
+		udelay(10);
 	}
 
 	if (i >= LOOP_TIMEOUT)
-- 
Gitee


From 5a1955f471aa77d43afc8bf50ca84dfe1f03b7d1 Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Tue, 12 Apr 2022 15:55:40 +0800
Subject: [PATCH 141/340] ext4: fix error handling in
 ext4_restore_inline_data()

stable inclusion
from linux-4.19.228
commit e9368c941a26098a199ee357f0370d49ac30b47f

--------------------------------

commit 897026aaa73eb2517dfea8d147f20ddb0b813044 upstream.

While running "./check -I 200 generic/475" it sometimes gives below
kernel BUG(). Ideally we should not call ext4_write_inline_data() if
ext4_create_inline_data() has failed.

<log snip>
[73131.453234] kernel BUG at fs/ext4/inline.c:223!

<code snip>
 212 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
 213                                    void *buffer, loff_t pos, unsigned int len)
 214 {
<...>
 223         BUG_ON(!EXT4_I(inode)->i_inline_off);
 224         BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);

This patch handles the error and prints out a emergency msg saying potential
data loss for the given inode (since we couldn't restore the original
inline_data due to some previous error).

[ 9571.070313] EXT4-fs (dm-0): error restoring inline_data for inode -- potential data loss! (inode 1703982, error -30)

Reported-by: Eric Whitney <enwlinux@gmail.com>
Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/9f4cd7dfd54fa58ff27270881823d94ddf78dd07.1642416995.git.riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/ext4/inline.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f36936bcd260..abb71ba0b1e6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1126,7 +1126,15 @@ static void ext4_restore_inline_data(handle_t *handle, struct inode *inode,
 				     struct ext4_iloc *iloc,
 				     void *buf, int inline_size)
 {
-	ext4_create_inline_data(handle, inode, inline_size);
+	int ret;
+
+	ret = ext4_create_inline_data(handle, inode, inline_size);
+	if (ret) {
+		ext4_msg(inode->i_sb, KERN_EMERG,
+			"error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)",
+			inode->i_ino, ret);
+		return;
+	}
 	ext4_write_inline_data(inode, iloc, buf, 0, inline_size);
 	ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
 }
-- 
Gitee


From d3178c7c22d632371f3c18710f414100eb5e7f6d Mon Sep 17 00:00:00 2001
From: Xiaoke Wang <xkernel.wang@foxmail.com>
Date: Tue, 12 Apr 2022 15:55:41 +0800
Subject: [PATCH 142/340] integrity: check the return value of
 audit_log_start()

stable inclusion
from linux-4.19.230
commit e9fa71bab4de31155eebb3074e29f49d6748fd1a

--------------------------------

commit 83230351c523b04ff8a029a4bdf97d881ecb96fc upstream.

audit_log_start() returns audit_buffer pointer on success or NULL on
error, so it is better to check the return value of it.

Fixes: 3323eec921ef ("integrity: IMA as an integrity service provider")
Signed-off-by: Xiaoke Wang <xkernel.wang@foxmail.com>
Cc: <stable@vger.kernel.org>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 security/integrity/integrity_audit.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/security/integrity/integrity_audit.c b/security/integrity/integrity_audit.c
index 82c98f7d217e..d03fbdfc972e 100644
--- a/security/integrity/integrity_audit.c
+++ b/security/integrity/integrity_audit.c
@@ -39,6 +39,8 @@ void integrity_audit_msg(int audit_msgno, struct inode *inode,
 		return;
 
 	ab = audit_log_start(audit_context(), GFP_KERNEL, audit_msgno);
+	if (!ab)
+		return;
 	audit_log_format(ab, "pid=%d uid=%u auid=%u ses=%u",
 			 task_pid_nr(current),
 			 from_kuid(&init_user_ns, current_cred()->uid),
-- 
Gitee


From a1bb26418f5b62f17a40d6c1a1af93a88c547450 Mon Sep 17 00:00:00 2001
From: Stefan Berger <stefanb@linux.ibm.com>
Date: Tue, 12 Apr 2022 15:55:42 +0800
Subject: [PATCH 143/340] ima: Remove ima_policy file before directory

stable inclusion
from linux-4.19.230
commit 2c3beddb39ebe4411497629442f594133f29e0cd

--------------------------------

commit f7333b9572d0559e00352a926c92f29f061b4569 upstream.

The removal of ima_dir currently fails since ima_policy still exists, so
remove the ima_policy file before removing the directory.

Fixes: 4af4662fa4a9 ("integrity: IMA policy")
Signed-off-by: Stefan Berger <stefanb@linux.ibm.com>
Cc: <stable@vger.kernel.org>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 security/integrity/ima/ima_fs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/security/integrity/ima/ima_fs.c b/security/integrity/ima/ima_fs.c
index 604cdac63d84..38bd565b9da9 100644
--- a/security/integrity/ima/ima_fs.c
+++ b/security/integrity/ima/ima_fs.c
@@ -497,12 +497,12 @@ int __init ima_fs_init(void)
 
 	return 0;
 out:
+	securityfs_remove(ima_policy);
 	securityfs_remove(violations);
 	securityfs_remove(runtime_measurements_count);
 	securityfs_remove(ascii_runtime_measurements);
 	securityfs_remove(binary_runtime_measurements);
 	securityfs_remove(ima_symlink);
 	securityfs_remove(ima_dir);
-	securityfs_remove(ima_policy);
 	return -1;
 }
-- 
Gitee


From e694730f5007fce98c53d1eb0847d5c65972dc27 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 12 Apr 2022 15:55:43 +0800
Subject: [PATCH 144/340] ima: Allow template selection with
 ima_template[_fmt]= after ima_hash=

stable inclusion
from linux-4.19.230
commit 4d9eb5b2ef21c496597f09355d9d6f5508731e98

--------------------------------

commit bb8e52e4906f148c2faf6656b5106cf7233e9301 upstream.

Commit c2426d2ad5027 ("ima: added support for new kernel cmdline parameter
ima_template_fmt") introduced an additional check on the ima_template
variable to avoid multiple template selection.

Unfortunately, ima_template could be also set by the setup function of the
ima_hash= parameter, when it calls ima_template_desc_current(). This causes
attempts to choose a new template with ima_template= or with
ima_template_fmt=, after ima_hash=, to be ignored.

Achieve the goal of the commit mentioned with the new static variable
template_setup_done, so that template selection requests after ima_hash=
are not ignored.

Finally, call ima_init_template_list(), if not already done, to initialize
the list of templates before lookup_template_desc() is called.

Reported-by: Guo Zihua <guozihua@huawei.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Cc: stable@vger.kernel.org
Fixes: c2426d2ad5027 ("ima: added support for new kernel cmdline parameter ima_template_fmt")
Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 security/integrity/ima/ima_template.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/security/integrity/ima/ima_template.c b/security/integrity/ima/ima_template.c
index 30db39b23804..4dfdccce497b 100644
--- a/security/integrity/ima/ima_template.c
+++ b/security/integrity/ima/ima_template.c
@@ -31,6 +31,7 @@ static struct ima_template_desc builtin_templates[] = {
 
 static LIST_HEAD(defined_templates);
 static DEFINE_SPINLOCK(template_list);
+static int template_setup_done;
 
 static struct ima_template_field supported_fields[] = {
 	{.field_id = "d", .field_init = ima_eventdigest_init,
@@ -57,10 +58,11 @@ static int __init ima_template_setup(char *str)
 	struct ima_template_desc *template_desc;
 	int template_len = strlen(str);
 
-	if (ima_template)
+	if (template_setup_done)
 		return 1;
 
-	ima_init_template_list();
+	if (!ima_template)
+		ima_init_template_list();
 
 	/*
 	 * Verify that a template with the supplied name exists.
@@ -84,6 +86,7 @@ static int __init ima_template_setup(char *str)
 	}
 
 	ima_template = template_desc;
+	template_setup_done = 1;
 	return 1;
 }
 __setup("ima_template=", ima_template_setup);
@@ -92,7 +95,7 @@ static int __init ima_template_fmt_setup(char *str)
 {
 	int num_templates = ARRAY_SIZE(builtin_templates);
 
-	if (ima_template)
+	if (template_setup_done)
 		return 1;
 
 	if (template_desc_init_fields(str, NULL, NULL) < 0) {
@@ -103,6 +106,7 @@ static int __init ima_template_fmt_setup(char *str)
 
 	builtin_templates[num_templates - 1].fmt = str;
 	ima_template = builtin_templates + num_templates - 1;
+	template_setup_done = 1;
 
 	return 1;
 }
-- 
Gitee


From f22022eb03eb7bdc3a2dc638a0017e45e5a21dd5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:55:44 +0800
Subject: [PATCH 145/340] NFS: Fix initialisation of nfs_client cl_flags field

stable inclusion
from linux-4.19.230
commit 57e13bdd9634a48c0bdbf988858144350321c0bf

--------------------------------

commit 468d126dab45718feeb728319be20bd869a5eaa7 upstream.

For some long forgotten reason, the nfs_client cl_flags field is
initialised in nfs_get_client() instead of being initialised at
allocation time. This quirk was harmless until we moved the call to
nfs_create_rpc_client().

Fixes: dd99e9f98fbf ("NFSv4: Initialise connection to the server in nfs4_alloc_client()")
Cc: stable@vger.kernel.org # 4.8.x
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/client.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b8adf3467786..7d02dc52209d 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -180,6 +180,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
 	INIT_LIST_HEAD(&clp->cl_superblocks);
 	clp->cl_rpcclient = ERR_PTR(-EINVAL);
 
+	clp->cl_flags = cl_init->init_flags;
 	clp->cl_proto = cl_init->proto;
 	clp->cl_net = get_net(cl_init->net);
 
@@ -427,7 +428,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
 			list_add_tail(&new->cl_share_link,
 					&nn->nfs_client_list);
 			spin_unlock(&nn->nfs_client_lock);
-			new->cl_flags = cl_init->init_flags;
 			return rpc_ops->init_client(new, cl_init);
 		}
 
-- 
Gitee


From 9e9b47e9d07a0043f5935a2c8d6e7a062c05445d Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Tue, 12 Apr 2022 15:55:45 +0800
Subject: [PATCH 146/340] NFSv4 only print the label when its queried

stable inclusion
from linux-4.19.230
commit 1789f59f1779c99d0756b40036c62a7519f53543

--------------------------------

[ Upstream commit 2c52c8376db7160a1dd8a681c61c9258405ef143 ]

When the bitmask of the attributes doesn't include the security label,
don't bother printing it. Since the label might not be null terminated,
adjust the printing format accordingly.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/nfs4xdr.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index feebe80f2092..982fac53e0c6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4300,10 +4300,11 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
 		} else
 			printk(KERN_WARNING "%s: label too long (%u)!\n",
 					__func__, len);
+		if (label && label->label)
+			dprintk("%s: label=%.*s, len=%d, PI=%d, LFS=%d\n",
+				__func__, label->len, (char *)label->label,
+				label->len, label->pi, label->lfs);
 	}
-	if (label && label->label)
-		dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
-			(char *)label->label, label->len, label->pi, label->lfs);
 	return status;
 
 out_overflow:
-- 
Gitee


From 008ae6aed08efa8a03d777b50e74189f79975f77 Mon Sep 17 00:00:00 2001
From: Xiaoke Wang <xkernel.wang@foxmail.com>
Date: Tue, 12 Apr 2022 15:55:46 +0800
Subject: [PATCH 147/340] nfs: nfs4clinet: check the return value of kstrdup()

stable inclusion
from linux-4.19.230
commit 2c9587f72ff4b502c2fd15eb3ccff388eae12d07

--------------------------------

[ Upstream commit fbd2057e5329d3502a27491190237b6be52a1cb6 ]

kstrdup() returns NULL when some internal memory errors happen, it is
better to check the return value of it so to catch the memory error in
time.

Signed-off-by: Xiaoke Wang <xkernel.wang@foxmail.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/nfs4client.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 36282e48cff6..60999b261666 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1276,8 +1276,11 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
 	}
 	nfs_put_client(clp);
 
-	if (server->nfs_client->cl_hostname == NULL)
+	if (server->nfs_client->cl_hostname == NULL) {
 		server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
+		if (server->nfs_client->cl_hostname == NULL)
+			return -ENOMEM;
+	}
 	nfs_server_insert_lists(server);
 
 	return nfs_probe_destination(server);
-- 
Gitee


From 4cb6ed69b775dfec22ea1f071817ca32be43a67f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:55:47 +0800
Subject: [PATCH 148/340] NFSv4.1: Fix uninitialised variable in devicenotify

stable inclusion
from linux-4.19.230
commit e6b0f9177c43ff9ad0f1a6dd3639c383373911b7

--------------------------------

[ Upstream commit b05bf5c63b326ce1da84ef42498d8e0e292e694c ]

When decode_devicenotify_args() exits with no entries, we need to
ensure that the struct cb_devicenotifyargs is initialised to
{ 0, NULL } in order to avoid problems in
nfs4_callback_devicenotify().

Reported-by: <rtm@csail.mit.edu>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/callback.h      |  2 +-
 fs/nfs/callback_proc.c |  2 +-
 fs/nfs/callback_xdr.c  | 18 +++++++++---------
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index 8f34daf85f70..5d5227ce4d91 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -168,7 +168,7 @@ struct cb_devicenotifyitem {
 };
 
 struct cb_devicenotifyargs {
-	int				 ndevs;
+	uint32_t			 ndevs;
 	struct cb_devicenotifyitem	 *devs;
 };
 
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index bcc51f131a49..868d66ed8bcf 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -364,7 +364,7 @@ __be32 nfs4_callback_devicenotify(void *argp, void *resp,
 				  struct cb_process_state *cps)
 {
 	struct cb_devicenotifyargs *args = argp;
-	int i;
+	uint32_t i;
 	__be32 res = 0;
 	struct nfs_client *clp = cps->clp;
 	struct nfs_server *server = NULL;
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 57558a8d92e9..76aa1b456c52 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -268,11 +268,9 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 				void *argp)
 {
 	struct cb_devicenotifyargs *args = argp;
+	uint32_t tmp, n, i;
 	__be32 *p;
 	__be32 status = 0;
-	u32 tmp;
-	int n, i;
-	args->ndevs = 0;
 
 	/* Num of device notifications */
 	p = read_buf(xdr, sizeof(uint32_t));
@@ -281,7 +279,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 		goto out;
 	}
 	n = ntohl(*p++);
-	if (n <= 0)
+	if (n == 0)
 		goto out;
 	if (n > ULONG_MAX / sizeof(*args->devs)) {
 		status = htonl(NFS4ERR_BADXDR);
@@ -339,19 +337,21 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
 			dev->cbd_immediate = 0;
 		}
 
-		args->ndevs++;
-
 		dprintk("%s: type %d layout 0x%x immediate %d\n",
 			__func__, dev->cbd_notify_type, dev->cbd_layout_type,
 			dev->cbd_immediate);
 	}
+	args->ndevs = n;
+	dprintk("%s: ndevs %d\n", __func__, args->ndevs);
+	return 0;
+err:
+	kfree(args->devs);
 out:
+	args->devs = NULL;
+	args->ndevs = 0;
 	dprintk("%s: status %d ndevs %d\n",
 		__func__, ntohl(status), args->ndevs);
 	return status;
-err:
-	kfree(args->devs);
-	goto out;
 }
 
 static __be32 decode_sessionid(struct xdr_stream *xdr,
-- 
Gitee


From a3877f4a4a470cf50728ee56132d12cdbe68cd18 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Tue, 12 Apr 2022 15:55:48 +0800
Subject: [PATCH 149/340] NFSv4 remove zero number of fs_locations entries
 error check

stable inclusion
from linux-4.19.230
commit 152f7db416c4bfcc8fc01e55cae60f63489580fa

--------------------------------

[ Upstream commit 90e12a3191040bd3854d3e236c35921e4e92a044 ]

Remove the check for the zero length fs_locations reply in the
xdr decoding, and instead check for that in the migration code.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/nfs4state.c | 3 +++
 fs/nfs/nfs4xdr.c   | 2 --
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 5ff516104699..f53f8f17629b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -2064,6 +2064,9 @@ static int nfs4_try_migration(struct nfs_server *server, struct rpc_cred *cred)
 	}
 
 	result = -NFS4ERR_NXIO;
+	if (!locations->nlocations)
+		goto out;
+
 	if (!(locations->fattr.valid & NFS_ATTR_FATTR_V4_LOCATIONS)) {
 		dprintk("<-- %s: No fs_locations data, migration skipped\n",
 			__func__);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 982fac53e0c6..e8917fc393a4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3753,8 +3753,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
 	if (unlikely(!p))
 		goto out_overflow;
 	n = be32_to_cpup(p);
-	if (n <= 0)
-		goto out_eio;
 	for (res->nlocations = 0; res->nlocations < n; res->nlocations++) {
 		u32 m;
 		struct nfs4_fs_location *loc;
-- 
Gitee


From 4c895d112ed1b4ba14887931cd63118e84aaac44 Mon Sep 17 00:00:00 2001
From: Olga Kornievskaia <kolga@netapp.com>
Date: Tue, 12 Apr 2022 15:55:49 +0800
Subject: [PATCH 150/340] NFSv4 expose nfs_parse_server_name function

stable inclusion
from linux-4.19.230
commit 42dc3cf3174ec714732825638947e26923d9ea2a

--------------------------------

[ Upstream commit f5b27cc6761e27ee6387a24df1a99ca77b360fea ]

Make nfs_parse_server_name available outside of nfs4namespace.c.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/nfs4_fs.h       | 3 ++-
 fs/nfs/nfs4namespace.c | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 4f3b5add052b..d22176d87448 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -277,7 +277,8 @@ struct vfsmount *nfs4_submount(struct nfs_server *, struct dentry *,
 			       struct nfs_fh *, struct nfs_fattr *);
 int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
-
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+			     size_t salen, struct net *net);
 /* nfs4proc.c */
 extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
 extern int nfs4_async_handle_error(struct rpc_task *task,
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 24f06dcc2b08..936c412be28e 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -121,8 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry,
 	return 0;
 }
 
-static size_t nfs_parse_server_name(char *string, size_t len,
-		struct sockaddr *sa, size_t salen, struct net *net)
+size_t nfs_parse_server_name(char *string, size_t len, struct sockaddr *sa,
+			     size_t salen, struct net *net)
 {
 	ssize_t ret;
 
-- 
Gitee


From 33d0429868bbffc5401e45102861aaf77df00047 Mon Sep 17 00:00:00 2001
From: ZouMingzhe <mingzhe.zou@easystack.cn>
Date: Tue, 12 Apr 2022 15:55:50 +0800
Subject: [PATCH 151/340] scsi: target: iscsi: Make sure the np under each tpg
 is unique

stable inclusion
from linux-4.19.230
commit ded80123b84253ecc6c6cfd1fbbbd99f51c984ec

--------------------------------

[ Upstream commit a861790afaa8b6369eee8a88c5d5d73f5799c0c6 ]

iscsit_tpg_check_network_portal() has nested for_each loops and is supposed
to return true when a match is found. However, the tpg loop will still
continue after existing the tpg_np loop. If this tpg_np is not the last the
match value will be changed.

Break the outer loop after finding a match and make sure the np under each
tpg is unique.

Link: https://lore.kernel.org/r/20220111054742.19582-1-mingzhe.zou@easystack.cn
Signed-off-by: ZouMingzhe <mingzhe.zou@easystack.cn>
Reviewed-by: Mike Christie <michael.christie@oracle.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/target/iscsi/iscsi_target_tpg.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c
index 101d62105c93..f3671ffdf149 100644
--- a/drivers/target/iscsi/iscsi_target_tpg.c
+++ b/drivers/target/iscsi/iscsi_target_tpg.c
@@ -451,6 +451,9 @@ static bool iscsit_tpg_check_network_portal(
 				break;
 		}
 		spin_unlock(&tpg->tpg_np_lock);
+
+		if (match)
+			break;
 	}
 	spin_unlock(&tiqn->tiqn_tpg_lock);
 
-- 
Gitee


From 25c90cb2e67c2decff30bf8a4bd9871a6dfa54fc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 12 Apr 2022 15:55:51 +0800
Subject: [PATCH 152/340] bpf: Add kconfig knob for disabling unpriv bpf by
 default

stable inclusion
from linux-4.19.230
commit 07e7f7cc619d15645e45d04b1c99550c6d292e9c

--------------------------------

commit 08389d888287c3823f80b0216766b71e17f0aba5 upstream.

Add a kconfig knob which allows for unprivileged bpf to be disabled by default.
If set, the knob sets /proc/sys/kernel/unprivileged_bpf_disabled to value of 2.

This still allows a transition of 2 -> {0,1} through an admin. Similarly,
this also still keeps 1 -> {1} behavior intact, so that once set to permanently
disabled, it cannot be undone aside from a reboot.

We've also added extra2 with max of 2 for the procfs handler, so that an admin
still has a chance to toggle between 0 <-> 2.

Either way, as an additional alternative, applications can make use of CAP_BPF
that we added a while ago.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/74ec548079189e4e4dffaeb42b8987bb3c852eee.1620765074.git.daniel@iogearbox.net
[fllinden@amazon.com: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 Documentation/sysctl/kernel.txt | 21 +++++++++++++++++++++
 init/Kconfig                    | 10 ++++++++++
 kernel/bpf/syscall.c            |  3 ++-
 kernel/sysctl.c                 | 29 +++++++++++++++++++++++++----
 4 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 7e33d039ea17..1fa03508ff5a 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -95,6 +95,7 @@ show up in /proc/sys/kernel:
 - sysctl_writes_strict
 - tainted
 - threads-max
+- unprivileged_bpf_disabled
 - unknown_nmi_panic
 - watchdog
 - watchdog_thresh
@@ -1072,6 +1073,26 @@ available RAM pages threads-max is reduced accordingly.
 
 ==============================================================
 
+unprivileged_bpf_disabled:
+
+Writing 1 to this entry will disable unprivileged calls to bpf();
+once disabled, calling bpf() without CAP_SYS_ADMIN will return
+-EPERM. Once set to 1, this can't be cleared from the running kernel
+anymore.
+
+Writing 2 to this entry will also disable unprivileged calls to bpf(),
+however, an admin can still change this setting later on, if needed, by
+writing 0 or 1 to this entry.
+
+If BPF_UNPRIV_DEFAULT_OFF is enabled in the kernel config, then this
+entry will default to 2 instead of 0.
+
+  0 - Unprivileged calls to bpf() are enabled
+  1 - Unprivileged calls to bpf() are disabled without recovery
+  2 - Unprivileged calls to bpf() are disabled
+
+==============================================================
+
 unknown_nmi_panic:
 
 The value in this file affects behavior of handling NMI. When the
diff --git a/init/Kconfig b/init/Kconfig
index 1a0b15c5a82b..cef0e5fdc901 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1539,6 +1539,16 @@ config BPF_JIT_ALWAYS_ON
 	  Enables BPF JIT and removes BPF interpreter to avoid
 	  speculative execution of BPF instructions by the interpreter
 
+config BPF_UNPRIV_DEFAULT_OFF
+	bool "Disable unprivileged BPF by default"
+	depends on BPF_SYSCALL
+	help
+	  Disables unprivileged BPF by default by setting the corresponding
+	  /proc/sys/kernel/unprivileged_bpf_disabled knob to 2. An admin can
+	  still reenable it by setting it to 0 later on, or permanently
+	  disable it by setting it to 1 (from which no other transition to
+	  0 is possible anymore).
+
 config USERFAULTFD
 	bool "Enable userfaultfd() system call"
 	select ANON_INODES
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 30253fb4254e..b11852ea1822 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -48,7 +48,8 @@ static DEFINE_SPINLOCK(prog_idr_lock);
 static DEFINE_IDR(map_idr);
 static DEFINE_SPINLOCK(map_idr_lock);
 
-int sysctl_unprivileged_bpf_disabled __read_mostly;
+int sysctl_unprivileged_bpf_disabled __read_mostly =
+	IS_BUILTIN(CONFIG_BPF_UNPRIV_DEFAULT_OFF) ? 2 : 0;
 
 static const struct bpf_map_ops * const bpf_map_types[] = {
 #define BPF_PROG_TYPE(_id, _ops)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 5445dead9911..d2eebbcdff52 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -257,6 +257,28 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 
 #endif
 
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_unpriv_handler(struct ctl_table *table, int write,
+                             void *buffer, size_t *lenp, loff_t *ppos)
+{
+	int ret, unpriv_enable = *(int *)table->data;
+	bool locked_state = unpriv_enable == 1;
+	struct ctl_table tmp = *table;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	tmp.data = &unpriv_enable;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (locked_state && unpriv_enable != 1)
+			return -EPERM;
+		*(int *)table->data = unpriv_enable;
+	}
+	return ret;
+}
+#endif
+
 static struct ctl_table kern_table[];
 static struct ctl_table vm_table[];
 static struct ctl_table fs_table[];
@@ -1247,10 +1269,9 @@ static struct ctl_table kern_table[] = {
 		.data		= &sysctl_unprivileged_bpf_disabled,
 		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
 		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &one,
-		.extra2		= &one,
+		.proc_handler	= bpf_unpriv_handler,
+		.extra1		= &zero,
+		.extra2		= &two,
 	},
 #endif
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
-- 
Gitee


From ab97b84dab5098364fef19fa61a016ba89d5be66 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Tue, 12 Apr 2022 15:55:52 +0800
Subject: [PATCH 153/340] bonding: pair enable_port with slave_arr_updates

stable inclusion
from linux-4.19.230
commit c92b23d934f008aea3ec3239d951f6c4cc0c4422

--------------------------------

[ Upstream commit 23de0d7b6f0e3f9a6283a882594c479949da1120 ]

When 803.2ad mode enables a participating port, it should update
the slave-array. I have observed that the member links are participating
and are part of the active aggregator while the traffic is egressing via
only one member link (in a case where two links are participating). Via
kprobes I discovered that slave-arr has only one link added while
the other participating link wasn't part of the slave-arr.

I couldn't see what caused that situation but the simple code-walk
through provided me hints that the enable_port wasn't always associated
with the slave-array update.

Fixes: ee6377147409 ("bonding: Simplify the xmit function for modes that use xmit_hash")
Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Acked-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Link: https://lore.kernel.org/r/20220207222901.1795287-1-maheshb@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/bonding/bond_3ad.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 93dfcef8afc4..035923876c61 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -1012,8 +1012,8 @@ static void ad_mux_machine(struct port *port, bool *update_slave_arr)
 				if (port->aggregator &&
 				    port->aggregator->is_active &&
 				    !__port_is_enabled(port)) {
-
 					__enable_port(port);
+					*update_slave_arr = true;
 				}
 			}
 			break;
@@ -1760,6 +1760,7 @@ static void ad_agg_selection_logic(struct aggregator *agg,
 			     port = port->next_port_in_aggregator) {
 				__enable_port(port);
 			}
+			*update_slave_arr = true;
 		}
 	}
 
-- 
Gitee


From a7f727331bad16bf5b30724cf22b1ea1a275c31e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:53 +0800
Subject: [PATCH 154/340] ipmr,ip6mr: acquire RTNL before calling
 ip[6]mr_free_table() on failure path

stable inclusion
from linux-4.19.230
commit 12b6703e9546902c56b4b9048b893ad49d62bdd4

--------------------------------

[ Upstream commit 5611a00697c8ecc5aad04392bea629e9d6a20463 ]

ip[6]mr_free_table() can only be called under RTNL lock.

RTNL: assertion failed at net/core/dev.c (10367)
WARNING: CPU: 1 PID: 5890 at net/core/dev.c:10367 unregister_netdevice_many+0x1246/0x1850 net/core/dev.c:10367
Modules linked in:
CPU: 1 PID: 5890 Comm: syz-executor.2 Not tainted 5.16.0-syzkaller-11627-g422ee58dc0ef #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:unregister_netdevice_many+0x1246/0x1850 net/core/dev.c:10367
Code: 0f 85 9b ee ff ff e8 69 07 4b fa ba 7f 28 00 00 48 c7 c6 00 90 ae 8a 48 c7 c7 40 90 ae 8a c6 05 6d b1 51 06 01 e8 8c 90 d8 01 <0f> 0b e9 70 ee ff ff e8 3e 07 4b fa 4c 89 e7 e8 86 2a 59 fa e9 ee
RSP: 0018:ffffc900046ff6e0 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff888050f51d00 RSI: ffffffff815fa008 RDI: fffff520008dfece
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff815f3d6e R11: 0000000000000000 R12: 00000000fffffff4
R13: dffffc0000000000 R14: ffffc900046ff750 R15: ffff88807b7dc000
FS:  00007f4ab736e700(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007fee0b4f8990 CR3: 000000001e7d2000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 mroute_clean_tables+0x244/0xb40 net/ipv6/ip6mr.c:1509
 ip6mr_free_table net/ipv6/ip6mr.c:389 [inline]
 ip6mr_rules_init net/ipv6/ip6mr.c:246 [inline]
 ip6mr_net_init net/ipv6/ip6mr.c:1306 [inline]
 ip6mr_net_init+0x3f0/0x4e0 net/ipv6/ip6mr.c:1298
 ops_init+0xaf/0x470 net/core/net_namespace.c:140
 setup_net+0x54f/0xbb0 net/core/net_namespace.c:331
 copy_net_ns+0x318/0x760 net/core/net_namespace.c:475
 create_new_namespaces+0x3f6/0xb20 kernel/nsproxy.c:110
 copy_namespaces+0x391/0x450 kernel/nsproxy.c:178
 copy_process+0x2e0c/0x7300 kernel/fork.c:2167
 kernel_clone+0xe7/0xab0 kernel/fork.c:2555
 __do_sys_clone+0xc8/0x110 kernel/fork.c:2672
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f4ab89f9059
Code: Unable to access opcode bytes at RIP 0x7f4ab89f902f.
RSP: 002b:00007f4ab736e118 EFLAGS: 00000206 ORIG_RAX: 0000000000000038
RAX: ffffffffffffffda RBX: 00007f4ab8b0bf60 RCX: 00007f4ab89f9059
RDX: 0000000020000280 RSI: 0000000020000270 RDI: 0000000040200000
RBP: 00007f4ab8a5308d R08: 0000000020000300 R09: 0000000020000300
R10: 00000000200002c0 R11: 0000000000000206 R12: 0000000000000000
R13: 00007ffc3977cc1f R14: 00007f4ab736e300 R15: 0000000000022000
 </TASK>

Fixes: f243e5a7859a ("ipmr,ip6mr: call ip6mr_free_table() on failure path")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Cong Wang <cong.wang@bytedance.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220208053451.2885398-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/ipmr.c  | 2 ++
 net/ipv6/ip6mr.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d235478d9ca3..2085af224a41 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -265,7 +265,9 @@ static int __net_init ipmr_rules_init(struct net *net)
 	return 0;
 
 err2:
+	rtnl_lock();
 	ipmr_free_table(mrt);
+	rtnl_unlock();
 err1:
 	fib_rules_unregister(ops);
 	return err;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9a8923e6a8fa..d392a72cd784 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -251,7 +251,9 @@ static int __net_init ip6mr_rules_init(struct net *net)
 	return 0;
 
 err2:
+	rtnl_lock();
 	ip6mr_free_table(mrt);
+	rtnl_unlock();
 err1:
 	fib_rules_unregister(ops);
 	return err;
-- 
Gitee


From ceaed295fb486954206baecd6a7ecca0d8c92a05 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Tue, 12 Apr 2022 15:55:54 +0800
Subject: [PATCH 155/340] net: do not keep the dst cache when uncloning an skb
 dst and its metadata

stable inclusion
from linux-4.19.230
commit 040e92ea3d7d6f27c1b71d6502e35c54a0939cb7

--------------------------------

[ Upstream commit cfc56f85e72f5b9c5c5be26dc2b16518d36a7868 ]

When uncloning an skb dst and its associated metadata a new dst+metadata
is allocated and the tunnel information from the old metadata is copied
over there.

The issue is the tunnel metadata has references to cached dst, which are
copied along the way. When a dst+metadata refcount drops to 0 the
metadata is freed including the cached dst entries. As they are also
referenced in the initial dst+metadata, this ends up in UaFs.

In practice the above did not happen because of another issue, the
dst+metadata was never freed because its refcount never dropped to 0
(this will be fixed in a subsequent patch).

Fix this by initializing the dst cache after copying the tunnel
information from the old metadata to also unshare the dst cache.

Fixes: d71785ffc7e7 ("net: add dst_cache to ovs vxlan lwtunnel")
Cc: Paolo Abeni <pabeni@redhat.com>
Reported-by: Vlad Buslov <vladbu@nvidia.com>
Tested-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/net/dst_metadata.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index 14efa0ded75d..b997e0c1e362 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -123,6 +123,19 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
 
 	memcpy(&new_md->u.tun_info, &md_dst->u.tun_info,
 	       sizeof(struct ip_tunnel_info) + md_size);
+#ifdef CONFIG_DST_CACHE
+	/* Unclone the dst cache if there is one */
+	if (new_md->u.tun_info.dst_cache.cache) {
+		int ret;
+
+		ret = dst_cache_init(&new_md->u.tun_info.dst_cache, GFP_ATOMIC);
+		if (ret) {
+			metadata_dst_free(new_md);
+			return ERR_PTR(ret);
+		}
+	}
+#endif
+
 	skb_dst_drop(skb);
 	dst_hold(&new_md->dst);
 	skb_dst_set(skb, &new_md->dst);
-- 
Gitee


From 0c4296c06307760841438d2672f506071f125553 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <atenart@kernel.org>
Date: Tue, 12 Apr 2022 15:55:55 +0800
Subject: [PATCH 156/340] net: fix a memleak when uncloning an skb dst and its
 metadata

stable inclusion
from linux-4.19.230
commit 0be943916d781df2b652793bb2d3ae4f9624c10a

--------------------------------

[ Upstream commit 9eeabdf17fa0ab75381045c867c370f4cc75a613 ]

When uncloning an skb dst and its associated metadata, a new
dst+metadata is allocated and later replaces the old one in the skb.
This is helpful to have a non-shared dst+metadata attached to a specific
skb.

The issue is the uncloned dst+metadata is initialized with a refcount of
1, which is increased to 2 before attaching it to the skb. When
tun_dst_unclone returns, the dst+metadata is only referenced from a
single place (the skb) while its refcount is 2. Its refcount will never
drop to 0 (when the skb is consumed), leading to a memory leak.

Fix this by removing the call to dst_hold in tun_dst_unclone, as the
dst+metadata refcount is already 1.

Fixes: fc4099f17240 ("openvswitch: Fix egress tunnel info.")
Cc: Pravin B Shelar <pshelar@ovn.org>
Reported-by: Vlad Buslov <vladbu@nvidia.com>
Tested-by: Vlad Buslov <vladbu@nvidia.com>
Signed-off-by: Antoine Tenart <atenart@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/net/dst_metadata.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h
index b997e0c1e362..adab27ba1ecb 100644
--- a/include/net/dst_metadata.h
+++ b/include/net/dst_metadata.h
@@ -137,7 +137,6 @@ static inline struct metadata_dst *tun_dst_unclone(struct sk_buff *skb)
 #endif
 
 	skb_dst_drop(skb);
-	dst_hold(&new_md->dst);
 	skb_dst_set(skb, &new_md->dst);
 	return new_md;
 }
-- 
Gitee


From c060bda80a7b459b7b92e12bb49bef0d1c7719ec Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:55:56 +0800
Subject: [PATCH 157/340] veth: fix races around rq->rx_notify_masked

stable inclusion
from linux-4.19.230
commit 8f0ea3777590c16222d4251a49e31dd376ff5ac1

--------------------------------

[ Upstream commit 68468d8c4cd4222a4ca1f185ab5a1c14480d078c ]

veth being NETIF_F_LLTX enabled, we need to be more careful
whenever we read/write rq->rx_notify_masked.

BUG: KCSAN: data-race in veth_xmit / veth_xmit

write to 0xffff888133d9a9f8 of 1 bytes by task 23552 on cpu 0:
 __veth_xdp_flush drivers/net/veth.c:269 [inline]
 veth_xmit+0x307/0x470 drivers/net/veth.c:350
 __netdev_start_xmit include/linux/netdevice.h:4683 [inline]
 netdev_start_xmit include/linux/netdevice.h:4697 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3473
 dev_hard_start_xmit net/core/dev.c:3489 [inline]
 __dev_queue_xmit+0x86d/0xf90 net/core/dev.c:4116
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4149
 br_dev_queue_push_xmit+0x3ce/0x430 net/bridge/br_forward.c:53
 NF_HOOK include/linux/netfilter.h:307 [inline]
 br_forward_finish net/bridge/br_forward.c:66 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 __br_forward+0x2e4/0x400 net/bridge/br_forward.c:115
 br_flood+0x521/0x5c0 net/bridge/br_forward.c:242
 br_dev_xmit+0x8b6/0x960
 __netdev_start_xmit include/linux/netdevice.h:4683 [inline]
 netdev_start_xmit include/linux/netdevice.h:4697 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3473
 dev_hard_start_xmit net/core/dev.c:3489 [inline]
 __dev_queue_xmit+0x86d/0xf90 net/core/dev.c:4116
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4149
 neigh_hh_output include/net/neighbour.h:525 [inline]
 neigh_output include/net/neighbour.h:539 [inline]
 ip_finish_output2+0x6f8/0xb70 net/ipv4/ip_output.c:228
 ip_finish_output+0xfb/0x240 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip_output+0xf3/0x1a0 net/ipv4/ip_output.c:430
 dst_output include/net/dst.h:451 [inline]
 ip_local_out net/ipv4/ip_output.c:126 [inline]
 ip_send_skb+0x6e/0xe0 net/ipv4/ip_output.c:1570
 udp_send_skb+0x641/0x880 net/ipv4/udp.c:967
 udp_sendmsg+0x12ea/0x14c0 net/ipv4/udp.c:1254
 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2413
 ___sys_sendmsg net/socket.c:2467 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553
 __do_sys_sendmmsg net/socket.c:2582 [inline]
 __se_sys_sendmmsg net/socket.c:2579 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

read to 0xffff888133d9a9f8 of 1 bytes by task 23563 on cpu 1:
 __veth_xdp_flush drivers/net/veth.c:268 [inline]
 veth_xmit+0x2d6/0x470 drivers/net/veth.c:350
 __netdev_start_xmit include/linux/netdevice.h:4683 [inline]
 netdev_start_xmit include/linux/netdevice.h:4697 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3473
 dev_hard_start_xmit net/core/dev.c:3489 [inline]
 __dev_queue_xmit+0x86d/0xf90 net/core/dev.c:4116
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4149
 br_dev_queue_push_xmit+0x3ce/0x430 net/bridge/br_forward.c:53
 NF_HOOK include/linux/netfilter.h:307 [inline]
 br_forward_finish net/bridge/br_forward.c:66 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 __br_forward+0x2e4/0x400 net/bridge/br_forward.c:115
 br_flood+0x521/0x5c0 net/bridge/br_forward.c:242
 br_dev_xmit+0x8b6/0x960
 __netdev_start_xmit include/linux/netdevice.h:4683 [inline]
 netdev_start_xmit include/linux/netdevice.h:4697 [inline]
 xmit_one+0x105/0x2f0 net/core/dev.c:3473
 dev_hard_start_xmit net/core/dev.c:3489 [inline]
 __dev_queue_xmit+0x86d/0xf90 net/core/dev.c:4116
 dev_queue_xmit+0x13/0x20 net/core/dev.c:4149
 neigh_hh_output include/net/neighbour.h:525 [inline]
 neigh_output include/net/neighbour.h:539 [inline]
 ip_finish_output2+0x6f8/0xb70 net/ipv4/ip_output.c:228
 ip_finish_output+0xfb/0x240 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip_output+0xf3/0x1a0 net/ipv4/ip_output.c:430
 dst_output include/net/dst.h:451 [inline]
 ip_local_out net/ipv4/ip_output.c:126 [inline]
 ip_send_skb+0x6e/0xe0 net/ipv4/ip_output.c:1570
 udp_send_skb+0x641/0x880 net/ipv4/udp.c:967
 udp_sendmsg+0x12ea/0x14c0 net/ipv4/udp.c:1254
 inet_sendmsg+0x5f/0x80 net/ipv4/af_inet.c:819
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2413
 ___sys_sendmsg net/socket.c:2467 [inline]
 __sys_sendmmsg+0x267/0x4c0 net/socket.c:2553
 __do_sys_sendmmsg net/socket.c:2582 [inline]
 __se_sys_sendmmsg net/socket.c:2579 [inline]
 __x64_sys_sendmmsg+0x53/0x60 net/socket.c:2579
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00 -> 0x01

Reported by Kernel Concurrency Sanitizer on:
CPU: 1 PID: 23563 Comm: syz-executor.5 Not tainted 5.17.0-rc2-syzkaller-00064-gc36c04c2e132 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 948d4f214fde ("veth: Add driver XDP")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/veth.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index 749faa6fcd82..ad098169d23f 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -152,9 +152,10 @@ static void __veth_xdp_flush(struct veth_rq *rq)
 {
 	/* Write ptr_ring before reading rx_notify_masked */
 	smp_mb();
-	if (!rq->rx_notify_masked) {
-		rq->rx_notify_masked = true;
-		napi_schedule(&rq->xdp_napi);
+	if (!READ_ONCE(rq->rx_notify_masked) &&
+	    napi_schedule_prep(&rq->xdp_napi)) {
+		WRITE_ONCE(rq->rx_notify_masked, true);
+		__napi_schedule(&rq->xdp_napi);
 	}
 }
 
@@ -623,8 +624,10 @@ static int veth_poll(struct napi_struct *napi, int budget)
 		/* Write rx_notify_masked before reading ptr_ring */
 		smp_store_mb(rq->rx_notify_masked, false);
 		if (unlikely(!__ptr_ring_empty(&rq->xdp_ring))) {
-			rq->rx_notify_masked = true;
-			napi_schedule(&rq->xdp_napi);
+			if (napi_schedule_prep(&rq->xdp_napi)) {
+				WRITE_ONCE(rq->rx_notify_masked, true);
+				__napi_schedule(&rq->xdp_napi);
+			}
 		}
 	}
 
-- 
Gitee


From b3195d2342748d56d5110a82ea0b6cced72c3d66 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?TATSUKAWA=20KOSUKE=20=28=E7=AB=8B=E5=B7=9D=20=E6=B1=9F?=
 =?UTF-8?q?=E4=BB=8B=29?= <tatsu-ab1@nec.com>
Date: Tue, 12 Apr 2022 15:55:57 +0800
Subject: [PATCH 158/340] n_tty: wake up poll(POLLRDNORM) on receiving data

stable inclusion
from linux-4.19.230
commit 031ec2d7fabb5c01601490671666c55cf92e1b98

--------------------------------

commit c816b2e65b0e86b95011418cad334f0524fc33b8 upstream.

The poll man page says POLLRDNORM is equivalent to POLLIN when used as
an event.
$ man poll
<snip>
              POLLRDNORM
                     Equivalent to POLLIN.

However, in n_tty driver, POLLRDNORM does not return until timeout even
if there is terminal input, whereas POLLIN returns.

The following test program works until kernel-3.17, but the test stops
in poll() after commit 57087d515441 ("tty: Fix spurious poll() wakeups").

[Steps to run test program]
  $ cc -o test-pollrdnorm test-pollrdnorm.c
  $ ./test-pollrdnorm
  foo          <-- Type in something from the terminal followed by [RET].
                   The string should be echoed back.

  ------------------------< test-pollrdnorm.c >------------------------
  #include <stdio.h>
  #include <errno.h>
  #include <poll.h>
  #include <unistd.h>

  void main(void)
  {
	int		n;
	unsigned char	buf[8];
	struct pollfd	fds[1] = {{ 0, POLLRDNORM, 0 }};

	n = poll(fds, 1, -1);
	if (n < 0)
		perror("poll");
	n = read(0, buf, 8);
	if (n < 0)
		perror("read");
	if (n > 0)
		write(1, buf, n);
  }
  ------------------------------------------------------------------------

The attached patch fixes this problem.  Many calls to
wake_up_interruptible_poll() in the kernel source code already specify
"POLLIN | POLLRDNORM".

Fixes: 57087d515441 ("tty: Fix spurious poll() wakeups")
Cc: stable@vger.kernel.org
Signed-off-by: Kosuke Tatsukawa <tatsu-ab1@nec.com>
Link: https://lore.kernel.org/r/TYCPR01MB81901C0F932203D30E452B3EA5209@TYCPR01MB8190.jpnprd01.prod.outlook.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/n_tty.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/tty/n_tty.c b/drivers/tty/n_tty.c
index 9369f5d2fdf5..a8d7ab7ee057 100644
--- a/drivers/tty/n_tty.c
+++ b/drivers/tty/n_tty.c
@@ -1375,7 +1375,7 @@ n_tty_receive_char_special(struct tty_struct *tty, unsigned char c)
 			put_tty_queue(c, ldata);
 			smp_store_release(&ldata->canon_head, ldata->read_head);
 			kill_fasync(&tty->fasync, SIGIO, POLL_IN);
-			wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
+			wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
 			return 0;
 		}
 	}
@@ -1656,7 +1656,7 @@ static void __receive_buf(struct tty_struct *tty, const unsigned char *cp,
 
 	if (read_cnt(ldata)) {
 		kill_fasync(&tty->fasync, SIGIO, POLL_IN);
-		wake_up_interruptible_poll(&tty->read_wait, EPOLLIN);
+		wake_up_interruptible_poll(&tty->read_wait, EPOLLIN | EPOLLRDNORM);
 	}
 }
 
-- 
Gitee


From e5d58f5ee731b2ff4e5c801cb875c804c63593e4 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 12 Apr 2022 15:55:58 +0800
Subject: [PATCH 159/340] seccomp: Invalidate seccomp mode to catch death
 failures

stable inclusion
from linux-4.19.230
commit 255264d81da6edaf4cd4fab836d1ef3ba09af6aa

--------------------------------

commit 495ac3069a6235bfdf516812a2a9b256671bbdf9 upstream.

If seccomp tries to kill a process, it should never see that process
again. To enforce this proactively, switch the mode to something
impossible. If encountered: WARN, reject all syscalls, and attempt to
kill the process again even harder.

Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Will Drewry <wad@chromium.org>
Fixes: 8112c4f140fa ("seccomp: remove 2-phase API")
Cc: stable@vger.kernel.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/seccomp.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index fd023ac24e10..369b63ac7cf9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -28,6 +28,9 @@
 #include <linux/syscalls.h>
 #include <linux/sysctl.h>
 
+/* Not exposed in headers: strictly internal use only. */
+#define SECCOMP_MODE_DEAD	(SECCOMP_MODE_FILTER + 1)
+
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
 #endif
@@ -629,6 +632,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
+	current->seccomp.mode = SECCOMP_MODE_DEAD;
 	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
 	do_exit(SIGKILL);
 }
@@ -743,6 +747,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_KILL_PROCESS:
 	default:
+		current->seccomp.mode = SECCOMP_MODE_DEAD;
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
 		if (action == SECCOMP_RET_KILL_PROCESS ||
@@ -793,6 +798,11 @@ int __secure_computing(const struct seccomp_data *sd)
 		return 0;
 	case SECCOMP_MODE_FILTER:
 		return __seccomp_filter(this_syscall, sd, false);
+	/* Surviving SECCOMP_RET_KILL_* must be proactively impossible. */
+	case SECCOMP_MODE_DEAD:
+		WARN_ON_ONCE(1);
+		do_exit(SIGKILL);
+		return -1;
 	default:
 		BUG();
 	}
-- 
Gitee


From 90dd4e1f09a0734c692d588d6c34045bfd509a35 Mon Sep 17 00:00:00 2001
From: Song Liu <song@kernel.org>
Date: Tue, 12 Apr 2022 15:55:59 +0800
Subject: [PATCH 160/340] perf: Fix list corruption in perf_cgroup_switch()

stable inclusion
from linux-4.19.230
commit 30d9f3cbe47e1018ddc8069ac5b5c9e66fbdf727

--------------------------------

commit 5f4e5ce638e6a490b976ade4a40017b40abb2da0 upstream.

There's list corruption on cgrp_cpuctx_list. This happens on the
following path:

  perf_cgroup_switch: list_for_each_entry(cgrp_cpuctx_list)
      cpu_ctx_sched_in
         ctx_sched_in
            ctx_pinned_sched_in
              merge_sched_in
                  perf_cgroup_event_disable: remove the event from the list

Use list_for_each_entry_safe() to allow removing an entry during
iteration.

Fixes: 058fe1c0440e ("perf/core: Make cgroup switch visit only cpuctxs with cgroup events")
Signed-off-by: Song Liu <song@kernel.org>
Reviewed-by: Rik van Riel <riel@surriel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20220204004057.2961252-1-song@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/events/core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index 00b22c820d1f..deba52307349 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -798,7 +798,7 @@ static DEFINE_PER_CPU(struct list_head, cgrp_cpuctx_list);
  */
 static void perf_cgroup_switch(struct task_struct *task, int mode)
 {
-	struct perf_cpu_context *cpuctx;
+	struct perf_cpu_context *cpuctx, *tmp;
 	struct list_head *list;
 	unsigned long flags;
 
@@ -809,7 +809,7 @@ static void perf_cgroup_switch(struct task_struct *task, int mode)
 	local_irq_save(flags);
 
 	list = this_cpu_ptr(&cgrp_cpuctx_list);
-	list_for_each_entry(cpuctx, list, cgrp_cpuctx_entry) {
+	list_for_each_entry_safe(cpuctx, tmp, list, cgrp_cpuctx_entry) {
 		WARN_ON_ONCE(cpuctx->ctx.nr_cgroups == 0);
 
 		perf_ctx_lock(cpuctx, cpuctx->task_ctx);
-- 
Gitee


From fa03a2a696857c4eb819963f546bc07b2a480962 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Tue, 12 Apr 2022 15:56:00 +0800
Subject: [PATCH 161/340] serial: parisc: GSC: fix build when IOSAPIC is not
 set

stable inclusion
from linux-4.19.231
commit 592b1b5ad3ee6cce1381111cfd69c6dc868050a3

--------------------------------

commit 6e8793674bb0d1135ca0e5c9f7e16fecbf815926 upstream.

There is a build error when using a kernel .config file from
'kernel test robot' for a different build problem:

hppa64-linux-ld: drivers/tty/serial/8250/8250_gsc.o: in function `.LC3':
(.data.rel.ro+0x18): undefined reference to `iosapic_serial_irq'

when:
  CONFIG_GSC=y
  CONFIG_SERIO_GSCPS2=y
  CONFIG_SERIAL_8250_GSC=y
  CONFIG_PCI is not set
    and hence PCI_LBA is not set.
  IOSAPIC depends on PCI_LBA, so IOSAPIC is not set/enabled.

Make the use of iosapic_serial_irq() conditional to fix the build error.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: "James E.J. Bottomley" <James.Bottomley@HansenPartnership.com>
Cc: Helge Deller <deller@gmx.de>
Cc: linux-parisc@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-serial@vger.kernel.org
Cc: Jiri Slaby <jirislaby@kernel.org>
Cc: Johan Hovold <johan@kernel.org>
Suggested-by: Helge Deller <deller@gmx.de>
Signed-off-by: Helge Deller <deller@gmx.de>
Cc: stable@vger.kernel.org
Signed-off-by: Helge Deller <deller@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/serial/8250/8250_gsc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/serial/8250/8250_gsc.c b/drivers/tty/serial/8250/8250_gsc.c
index 0809ae2aa9b1..51cc985216ff 100644
--- a/drivers/tty/serial/8250/8250_gsc.c
+++ b/drivers/tty/serial/8250/8250_gsc.c
@@ -26,7 +26,7 @@ static int __init serial_init_chip(struct parisc_device *dev)
 	unsigned long address;
 	int err;
 
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_64BIT) && defined(CONFIG_IOSAPIC)
 	if (!dev->irq && (dev->id.sversion == 0xad))
 		dev->irq = iosapic_serial_irq(dev);
 #endif
-- 
Gitee


From 976db2f4b02c3205c99040cc49586e6d10e74463 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 12 Apr 2022 15:56:01 +0800
Subject: [PATCH 162/340] vfs: make freeze_super abort when sync_filesystem
 returns error

stable inclusion
from linux-4.19.231
commit d5c33270b8b2c274eb8df7b92d166166102528c6

--------------------------------

[ Upstream commit 2719c7160dcfaae1f73a1c0c210ad3281c19022e ]

If we fail to synchronize the filesystem while preparing to freeze the
fs, abort the freeze.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/super.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/fs/super.c b/fs/super.c
index 9fb4553c46e6..8dc26e23f1a3 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1404,11 +1404,9 @@ static void lockdep_sb_freeze_acquire(struct super_block *sb)
 		percpu_rwsem_acquire(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
 }
 
-static void sb_freeze_unlock(struct super_block *sb)
+static void sb_freeze_unlock(struct super_block *sb, int level)
 {
-	int level;
-
-	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
+	for (level--; level >= 0; level--)
 		percpu_up_write(sb->s_writers.rw_sem + level);
 }
 
@@ -1479,7 +1477,14 @@ int freeze_super(struct super_block *sb)
 	sb_wait_write(sb, SB_FREEZE_PAGEFAULT);
 
 	/* All writers are done so after syncing there won't be dirty data */
-	sync_filesystem(sb);
+	ret = sync_filesystem(sb);
+	if (ret) {
+		sb->s_writers.frozen = SB_UNFROZEN;
+		sb_freeze_unlock(sb, SB_FREEZE_PAGEFAULT);
+		wake_up(&sb->s_writers.wait_unfrozen);
+		deactivate_locked_super(sb);
+		return ret;
+	}
 
 	/* Now wait for internal filesystem counter */
 	sb->s_writers.frozen = SB_FREEZE_FS;
@@ -1491,7 +1496,7 @@ int freeze_super(struct super_block *sb)
 			printk(KERN_ERR
 				"VFS:Filesystem freeze failed\n");
 			sb->s_writers.frozen = SB_UNFROZEN;
-			sb_freeze_unlock(sb);
+			sb_freeze_unlock(sb, SB_FREEZE_FS);
 			wake_up(&sb->s_writers.wait_unfrozen);
 			deactivate_locked_super(sb);
 			return ret;
@@ -1542,7 +1547,7 @@ static int thaw_super_locked(struct super_block *sb)
 	}
 
 	sb->s_writers.frozen = SB_UNFROZEN;
-	sb_freeze_unlock(sb);
+	sb_freeze_unlock(sb, SB_FREEZE_FS);
 out:
 	wake_up(&sb->s_writers.wait_unfrozen);
 	deactivate_locked_super(sb);
-- 
Gitee


From 93ed5e1c52141d99b9f8557ec02014be7956eabc Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Tue, 12 Apr 2022 15:56:02 +0800
Subject: [PATCH 163/340] quota: make dquot_quota_sync return errors from
 ->sync_fs

stable inclusion
from linux-4.19.231
commit a1a41571f06e2b66229738bd0c92c1d57dd793e2

--------------------------------

[ Upstream commit dd5532a4994bfda0386eb2286ec00758cee08444 ]

Strangely, dquot_quota_sync ignores the return code from the ->sync_fs
call, which means that quotacalls like Q_SYNC never see the error.  This
doesn't seem right, so fix that.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Christian Brauner <brauner@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/quota/dquot.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1d1d393f4208..ddb379abd919 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -687,9 +687,14 @@ int dquot_quota_sync(struct super_block *sb, int type)
 	/* This is not very clever (and fast) but currently I don't know about
 	 * any other simple way of getting quota data to disk and we must get
 	 * them there for userspace to be visible... */
-	if (sb->s_op->sync_fs)
-		sb->s_op->sync_fs(sb, 1);
-	sync_blockdev(sb->s_bdev);
+	if (sb->s_op->sync_fs) {
+		ret = sb->s_op->sync_fs(sb, 1);
+		if (ret)
+			return ret;
+	}
+	ret = sync_blockdev(sb->s_bdev);
+	if (ret)
+		return ret;
 
 	/*
 	 * Now when everything is written we can discard the pagecache so
-- 
Gitee


From 0173888a936ab577dfc98156c76901f91919e3d1 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagi@grimberg.me>
Date: Tue, 12 Apr 2022 15:56:03 +0800
Subject: [PATCH 164/340] nvme: fix a possible use-after-free in controller
 reset during load

stable inclusion
from linux-4.19.231
commit a25e460fbb0340488d119fb2e28fe3f829b7417e

--------------------------------

[ Upstream commit 0fa0f99fc84e41057cbdd2efbfe91c6b2f47dd9d ]

Unlike .queue_rq, in .submit_async_event drivers may not check the ctrl
readiness for AER submission. This may lead to a use-after-free
condition that was observed with nvme-tcp.

The race condition may happen in the following scenario:
1. driver executes its reset_ctrl_work
2. -> nvme_stop_ctrl - flushes ctrl async_event_work
3. ctrl sends AEN which is received by the host, which in turn
   schedules AEN handling
4. teardown admin queue (which releases the queue socket)
5. AEN processed, submits another AER, calling the driver to submit
6. driver attempts to send the cmd
==> use-after-free

In order to fix that, add ctrl state check to validate the ctrl
is actually able to accept the AER submission.

This addresses the above race in controller resets because the driver
during teardown should:
1. change ctrl state to RESETTING
2. flush async_event_work (as well as other async work elements)

So after 1,2, any other AER command will find the
ctrl state to be RESETTING and bail out without submitting the AER.

Signed-off-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/nvme/host/core.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index feba1a34c15e..164ec45dac84 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -3683,7 +3683,14 @@ static void nvme_async_event_work(struct work_struct *work)
 		container_of(work, struct nvme_ctrl, async_event_work);
 
 	nvme_aen_uevent(ctrl);
-	ctrl->ops->submit_async_event(ctrl);
+
+	/*
+	 * The transport drivers must guarantee AER submission here is safe by
+	 * flushing ctrl async_event_work after changing the controller state
+	 * from LIVE and before freeing the admin queue.
+	*/
+	if (ctrl->state == NVME_CTRL_LIVE)
+		ctrl->ops->submit_async_event(ctrl);
 }
 
 static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl)
-- 
Gitee


From 9f4c7283e5e436a211e9e98f87ff0e34f1efb347 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Tue, 12 Apr 2022 15:56:04 +0800
Subject: [PATCH 165/340] xfrm: Don't accidentally set RTO_ONLINK in
 decode_session4()

stable inclusion
from linux-4.19.231
commit bbf7a1a2fc64d89896ba5eba494a40ca151a2675

--------------------------------

commit 23e7b1bfed61e301853b5e35472820d919498278 upstream.

Similar to commit 94e2238969e8 ("xfrm4: strip ECN bits from tos field"),
clear the ECN bits from iph->tos when setting ->flowi4_tos.
This ensures that the last bit of ->flowi4_tos is cleared, so
ip_route_output_key_hash() isn't going to restrict the scope of the
route lookup.

Use ~INET_ECN_MASK instead of IPTOS_RT_MASK, because we have no reason
to clear the high order bits.

Found by code inspection, compile tested only.

Fixes: 4da3089f2b58 ("[IPSEC]: Use TOS when doing tunnel lookups")
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
[sudip: manually backport to previous location]
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/xfrm4_policy.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 1e5e2e4be0b2..e85b5f57d3e9 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -17,6 +17,7 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 #include <net/l3mdev.h>
+#include <net/inet_ecn.h>
 
 static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4,
 					    int tos, int oif,
@@ -126,7 +127,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 	fl4->flowi4_proto = iph->protocol;
 	fl4->daddr = reverse ? iph->saddr : iph->daddr;
 	fl4->saddr = reverse ? iph->daddr : iph->saddr;
-	fl4->flowi4_tos = iph->tos;
+	fl4->flowi4_tos = iph->tos & ~INET_ECN_MASK;
 
 	if (!ip_is_fragment(iph)) {
 		switch (iph->protocol) {
-- 
Gitee


From b67cb551884fb51136d0bf28dc1716aab90c725b Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 12 Apr 2022 15:56:05 +0800
Subject: [PATCH 166/340] taskstats: Cleanup the use of task->exit_code

stable inclusion
from linux-4.19.231
commit c6055df602b9e9ed9427b7bb9088c75cb5cf6350

--------------------------------

commit 1b5a42d9c85f0e731f01c8d1129001fd8531a8a0 upstream.

In the function bacct_add_task the code reading task->exit_code was
introduced in commit f3cef7a99469 ("[PATCH] csa: basic accounting over
taskstats"), and it is not entirely clear what the taskstats interface
is trying to return as only returning the exit_code of the first task
in a process doesn't make a lot of sense.

As best as I can figure the intent is to return task->exit_code after
a task exits.  The field is returned with per task fields, so the
exit_code of the entire process is not wanted.  Only the value of the
first task is returned so this is not a useful way to get the per task
ptrace stop code.  The ordinary case of returning this value is
returning after a task exits, which also precludes use for getting
a ptrace value.

It is common to for the first task of a process to also be the last
task of a process so this field may have done something reasonable by
accident in testing.

Make ac_exitcode a reliable per task value by always returning it for
every exited task.

Setting ac_exitcode in a sensible mannter makes it possible to continue
to provide this value going forward.

Cc: Balbir Singh <bsingharora@gmail.com>
Fixes: f3cef7a99469 ("[PATCH] csa: basic accounting over taskstats")
Link: https://lkml.kernel.org/r/20220103213312.9144-5-ebiederm@xmission.com
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
[sudip: adjust context]
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/tsacct.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/kernel/tsacct.c b/kernel/tsacct.c
index 370724b45391..8d7c6d3f1daa 100644
--- a/kernel/tsacct.c
+++ b/kernel/tsacct.c
@@ -46,11 +46,10 @@ void bacct_add_tsk(struct user_namespace *user_ns,
 	/* Convert to seconds for btime */
 	do_div(delta, USEC_PER_SEC);
 	stats->ac_btime = get_seconds() - delta;
-	if (thread_group_leader(tsk)) {
+	if (tsk->flags & PF_EXITING)
 		stats->ac_exitcode = tsk->exit_code;
-		if (tsk->flags & PF_FORKNOEXEC)
-			stats->ac_flag |= AFORK;
-	}
+	if (thread_group_leader(tsk) && (tsk->flags & PF_FORKNOEXEC))
+		stats->ac_flag |= AFORK;
 	if (tsk->flags & PF_SUPERPRIV)
 		stats->ac_flag |= ASU;
 	if (tsk->flags & PF_DUMPCORE)
-- 
Gitee


From 8f07718445d2d571a85c539a7f04c6b77ee8f5d9 Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Tue, 12 Apr 2022 15:56:06 +0800
Subject: [PATCH 167/340] ping: fix the dif and sdif check in ping_lookup

stable inclusion
from linux-4.19.231
commit b0e55a57df7deed5f0682b584e4da913db2f6e84

--------------------------------

commit 35a79e64de29e8d57a5989aac57611c0cd29e13e upstream.

When 'ping' changes to use PING socket instead of RAW socket by:

   # sysctl -w net.ipv4.ping_group_range="0 100"

There is another regression caused when matching sk_bound_dev_if
and dif, RAW socket is using inet_iif() while PING socket lookup
is using skb->dev->ifindex, the cmd below fails due to this:

  # ip link add dummy0 type dummy
  # ip link set dummy0 up
  # ip addr add 192.168.111.1/24 dev dummy0
  # ping -I dummy0 192.168.111.1 -c1

The issue was also reported on:

  https://github.com/iputils/iputils/issues/104

But fixed in iputils in a wrong way by not binding to device when
destination IP is on device, and it will cause some of kselftests
to fail, as Jianlin noticed.

This patch is to use inet(6)_iif and inet(6)_sdif to get dif and
sdif for PING socket, and keep consistent with RAW socket.

Fixes: c319b4d76b9e ("net: ipv4: add IPPROTO_ICMP socket kind")
Reported-by: Jianlin Shi <jishi@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/ping.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 276442443d32..28d171e6e10b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -177,16 +177,23 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 	struct sock *sk = NULL;
 	struct inet_sock *isk;
 	struct hlist_nulls_node *hnode;
-	int dif = skb->dev->ifindex;
+	int dif, sdif;
 
 	if (skb->protocol == htons(ETH_P_IP)) {
+		dif = inet_iif(skb);
+		sdif = inet_sdif(skb);
 		pr_debug("try to find: num = %d, daddr = %pI4, dif = %d\n",
 			 (int)ident, &ip_hdr(skb)->daddr, dif);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		dif = inet6_iif(skb);
+		sdif = inet6_sdif(skb);
 		pr_debug("try to find: num = %d, daddr = %pI6c, dif = %d\n",
 			 (int)ident, &ipv6_hdr(skb)->daddr, dif);
 #endif
+	} else {
+		pr_err("ping: protocol(%x) is not supported\n", ntohs(skb->protocol));
+		return NULL;
 	}
 
 	read_lock_bh(&ping_table.lock);
@@ -226,7 +233,7 @@ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident)
 		}
 
 		if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif &&
-		    sk->sk_bound_dev_if != inet_sdif(skb))
+		    sk->sk_bound_dev_if != sdif)
 			continue;
 
 		sock_hold(sk);
-- 
Gitee


From 452fc6b6c1221c7d2d103f6c81a01aaacefc1b1e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:56:07 +0800
Subject: [PATCH 168/340] drop_monitor: fix data-race in dropmon_net_event /
 trace_napi_poll_hit

stable inclusion
from linux-4.19.231
commit e294bc65746b779e4ad50f95ed04926cf72d1454

--------------------------------

commit dcd54265c8bc14bd023815e36e2d5f9d66ee1fee upstream.

trace_napi_poll_hit() is reading stat->dev while another thread can write
on it from dropmon_net_event()

Use READ_ONCE()/WRITE_ONCE() here, RCU rules are properly enforced already,
we only have to take care of load/store tearing.

BUG: KCSAN: data-race in dropmon_net_event / trace_napi_poll_hit

write to 0xffff88816f3ab9c0 of 8 bytes by task 20260 on cpu 1:
 dropmon_net_event+0xb8/0x2b0 net/core/drop_monitor.c:1579
 notifier_call_chain kernel/notifier.c:84 [inline]
 raw_notifier_call_chain+0x53/0xb0 kernel/notifier.c:392
 call_netdevice_notifiers_info net/core/dev.c:1919 [inline]
 call_netdevice_notifiers_extack net/core/dev.c:1931 [inline]
 call_netdevice_notifiers net/core/dev.c:1945 [inline]
 unregister_netdevice_many+0x867/0xfb0 net/core/dev.c:10415
 ip_tunnel_delete_nets+0x24a/0x280 net/ipv4/ip_tunnel.c:1123
 vti_exit_batch_net+0x2a/0x30 net/ipv4/ip_vti.c:515
 ops_exit_list net/core/net_namespace.c:173 [inline]
 cleanup_net+0x4dc/0x8d0 net/core/net_namespace.c:597
 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
 worker_thread+0x616/0xa70 kernel/workqueue.c:2454
 kthread+0x1bf/0x1e0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30

read to 0xffff88816f3ab9c0 of 8 bytes by interrupt on cpu 0:
 trace_napi_poll_hit+0x89/0x1c0 net/core/drop_monitor.c:292
 trace_napi_poll include/trace/events/napi.h:14 [inline]
 __napi_poll+0x36b/0x3f0 net/core/dev.c:6366
 napi_poll net/core/dev.c:6432 [inline]
 net_rx_action+0x29e/0x650 net/core/dev.c:6519
 __do_softirq+0x158/0x2de kernel/softirq.c:558
 do_softirq+0xb1/0xf0 kernel/softirq.c:459
 __local_bh_enable_ip+0x68/0x70 kernel/softirq.c:383
 __raw_spin_unlock_bh include/linux/spinlock_api_smp.h:167 [inline]
 _raw_spin_unlock_bh+0x33/0x40 kernel/locking/spinlock.c:210
 spin_unlock_bh include/linux/spinlock.h:394 [inline]
 ptr_ring_consume_bh include/linux/ptr_ring.h:367 [inline]
 wg_packet_decrypt_worker+0x73c/0x780 drivers/net/wireguard/receive.c:506
 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
 worker_thread+0x616/0xa70 kernel/workqueue.c:2454
 kthread+0x1bf/0x1e0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30

value changed: 0xffff88815883e000 -> 0x0000000000000000

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 26435 Comm: kworker/0:1 Not tainted 5.17.0-rc1-syzkaller #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: wg-crypt-wg2 wg_packet_decrypt_worker

Fixes: 4ea7e38696c7 ("dropmon: add ability to detect when hardware dropsrxpackets")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Neil Horman <nhorman@tuxdriver.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/drop_monitor.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index 3978a5e8d261..2ed600012640 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -219,13 +219,17 @@ static void trace_napi_poll_hit(void *ignore, struct napi_struct *napi,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(new_stat, &hw_stats_list, list) {
+		struct net_device *dev;
+
 		/*
 		 * only add a note to our monitor buffer if:
 		 * 1) this is the dev we received on
 		 * 2) its after the last_rx delta
 		 * 3) our rx_dropped count has gone up
 		 */
-		if ((new_stat->dev == napi->dev)  &&
+		/* Paired with WRITE_ONCE() in dropmon_net_event() */
+		dev = READ_ONCE(new_stat->dev);
+		if ((dev == napi->dev)  &&
 		    (time_after(jiffies, new_stat->last_rx + dm_hw_check_delta)) &&
 		    (napi->dev->stats.rx_dropped != new_stat->last_drop_val)) {
 			trace_drop_common(NULL, NULL);
@@ -340,7 +344,10 @@ static int dropmon_net_event(struct notifier_block *ev_block,
 		mutex_lock(&trace_state_mutex);
 		list_for_each_entry_safe(new_stat, tmp, &hw_stats_list, list) {
 			if (new_stat->dev == dev) {
-				new_stat->dev = NULL;
+
+				/* Paired with READ_ONCE() in trace_napi_poll_hit() */
+				WRITE_ONCE(new_stat->dev, NULL);
+
 				if (trace_state == TRACE_OFF) {
 					list_del_rcu(&new_stat->list);
 					kfree_rcu(new_stat, rcu);
-- 
Gitee


From 68f74ceb415961ef2270751a1e9bee990752a9f6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:56:08 +0800
Subject: [PATCH 169/340] bonding: fix data-races around agg_select_timer

stable inclusion
from linux-4.19.231
commit 4218e6995c19970aa7b32914be6c8e059837cbdf

--------------------------------

commit 9ceaf6f76b203682bb6100e14b3d7da4c0bedde8 upstream.

syzbot reported that two threads might write over agg_select_timer
at the same time. Make agg_select_timer atomic to fix the races.

BUG: KCSAN: data-race in bond_3ad_initiate_agg_selection / bond_3ad_state_machine_handler

read to 0xffff8881242aea90 of 4 bytes by task 1846 on cpu 1:
 bond_3ad_state_machine_handler+0x99/0x2810 drivers/net/bonding/bond_3ad.c:2317
 process_one_work+0x3f6/0x960 kernel/workqueue.c:2307
 worker_thread+0x616/0xa70 kernel/workqueue.c:2454
 kthread+0x1bf/0x1e0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30

write to 0xffff8881242aea90 of 4 bytes by task 25910 on cpu 0:
 bond_3ad_initiate_agg_selection+0x18/0x30 drivers/net/bonding/bond_3ad.c:1998
 bond_open+0x658/0x6f0 drivers/net/bonding/bond_main.c:3967
 __dev_open+0x274/0x3a0 net/core/dev.c:1407
 dev_open+0x54/0x190 net/core/dev.c:1443
 bond_enslave+0xcef/0x3000 drivers/net/bonding/bond_main.c:1937
 do_set_master net/core/rtnetlink.c:2532 [inline]
 do_setlink+0x94f/0x2500 net/core/rtnetlink.c:2736
 __rtnl_newlink net/core/rtnetlink.c:3414 [inline]
 rtnl_newlink+0xfeb/0x13e0 net/core/rtnetlink.c:3529
 rtnetlink_rcv_msg+0x745/0x7e0 net/core/rtnetlink.c:5594
 netlink_rcv_skb+0x14e/0x250 net/netlink/af_netlink.c:2494
 rtnetlink_rcv+0x18/0x20 net/core/rtnetlink.c:5612
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x602/0x6d0 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0x728/0x850 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg net/socket.c:725 [inline]
 ____sys_sendmsg+0x39a/0x510 net/socket.c:2413
 ___sys_sendmsg net/socket.c:2467 [inline]
 __sys_sendmsg+0x195/0x230 net/socket.c:2496
 __do_sys_sendmsg net/socket.c:2505 [inline]
 __se_sys_sendmsg net/socket.c:2503 [inline]
 __x64_sys_sendmsg+0x42/0x50 net/socket.c:2503
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x44/0xd0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae

value changed: 0x00000050 -> 0x0000004f

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 25910 Comm: syz-executor.1 Tainted: G        W         5.17.0-rc4-syzkaller-dirty #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/bonding/bond_3ad.c | 30 +++++++++++++++++++++++++-----
 include/net/bond_3ad.h         |  2 +-
 2 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c
index 035923876c61..e3f814e83d9c 100644
--- a/drivers/net/bonding/bond_3ad.c
+++ b/drivers/net/bonding/bond_3ad.c
@@ -249,7 +249,7 @@ static inline int __check_agg_selection_timer(struct port *port)
 	if (bond == NULL)
 		return 0;
 
-	return BOND_AD_INFO(bond).agg_select_timer ? 1 : 0;
+	return atomic_read(&BOND_AD_INFO(bond).agg_select_timer) ? 1 : 0;
 }
 
 /**
@@ -1965,7 +1965,7 @@ static void ad_marker_response_received(struct bond_marker *marker,
  */
 void bond_3ad_initiate_agg_selection(struct bonding *bond, int timeout)
 {
-	BOND_AD_INFO(bond).agg_select_timer = timeout;
+	atomic_set(&BOND_AD_INFO(bond).agg_select_timer, timeout);
 }
 
 /**
@@ -2249,6 +2249,28 @@ void bond_3ad_update_ad_actor_settings(struct bonding *bond)
 	spin_unlock_bh(&bond->mode_lock);
 }
 
+/**
+ * bond_agg_timer_advance - advance agg_select_timer
+ * @bond:  bonding structure
+ *
+ * Return true when agg_select_timer reaches 0.
+ */
+static bool bond_agg_timer_advance(struct bonding *bond)
+{
+	int val, nval;
+
+	while (1) {
+		val = atomic_read(&BOND_AD_INFO(bond).agg_select_timer);
+		if (!val)
+			return false;
+		nval = val - 1;
+		if (atomic_cmpxchg(&BOND_AD_INFO(bond).agg_select_timer,
+				   val, nval) == val)
+			break;
+	}
+	return nval == 0;
+}
+
 /**
  * bond_3ad_state_machine_handler - handle state machines timeout
  * @bond: bonding struct to work on
@@ -2284,9 +2306,7 @@ void bond_3ad_state_machine_handler(struct work_struct *work)
 	if (!bond_has_slaves(bond))
 		goto re_arm;
 
-	/* check if agg_select_timer timer after initialize is timed out */
-	if (BOND_AD_INFO(bond).agg_select_timer &&
-	    !(--BOND_AD_INFO(bond).agg_select_timer)) {
+	if (bond_agg_timer_advance(bond)) {
 		slave = bond_first_slave_rcu(bond);
 		port = slave ? &(SLAVE_AD_INFO(slave)->port) : NULL;
 
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index fc3111515f5c..732bc3b4606b 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -265,7 +265,7 @@ struct ad_system {
 
 struct ad_bond_info {
 	struct ad_system system;	/* 802.3ad system structure */
-	u32 agg_select_timer;		/* Timer to select aggregator after all adapter's hand shakes */
+	atomic_t agg_select_timer;	/* Timer to select aggregator after all adapter's hand shakes */
 	u16 aggregator_identifier;
 };
 
-- 
Gitee


From 386778705e50f8b11ae4d1f8d85ca048420b8fb6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:56:09 +0800
Subject: [PATCH 170/340] NFS: LOOKUP_DIRECTORY is also ok with symlinks

stable inclusion
from linux-4.19.231
commit 67552482aee7a139ed957595db8668d28ddc2c42

--------------------------------

commit e0caaf75d443e02e55e146fd75fe2efc8aed5540 upstream.

Commit ac795161c936 (NFSv4: Handle case where the lookup of a directory
fails) [1], part of Linux since 5.17-rc2, introduced a regression, where
a symbolic link on an NFS mount to a directory on another NFS does not
resolve(?) the first time it is accessed:

Reported-by: Paul Menzel <pmenzel@molgen.mpg.de>
Fixes: ac795161c936 ("NFSv4: Handle case where the lookup of a directory fails")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Tested-by: Donald Buczek <buczek@molgen.mpg.de>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/dir.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 72e5ae8cc4ff..2ec602eccb80 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1640,14 +1640,14 @@ int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	if (!res) {
 		inode = d_inode(dentry);
 		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
-		    !S_ISDIR(inode->i_mode))
+		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
 			res = ERR_PTR(-ENOTDIR);
 		else if (inode && S_ISREG(inode->i_mode))
 			res = ERR_PTR(-EOPENSTALE);
 	} else if (!IS_ERR(res)) {
 		inode = d_inode(res);
 		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
-		    !S_ISDIR(inode->i_mode)) {
+		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
 			dput(res);
 			res = ERR_PTR(-ENOTDIR);
 		} else if (inode && S_ISREG(inode->i_mode)) {
-- 
Gitee


From e488b6784f338947b60eac630caee5f995f67a16 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 12 Apr 2022 15:56:10 +0800
Subject: [PATCH 171/340] NFS: Do not report writeback errors in nfs_getattr()

stable inclusion
from linux-4.19.231
commit d2ba21f271eb167ac2b0581598e55222b89f0f32

--------------------------------

commit d19e0183a88306acda07f4a01fedeeffe2a2a06b upstream.

The result of the writeback, whether it is an ENOSPC or an EIO, or
anything else, does not inhibit the NFS client from reporting the
correct file timestamps.

Fixes: 79566ef018f5 ("NFS: Getattr doesn't require data sync semantics")
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/nfs/inode.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 6d5fedcd30d0..db52430dfbd7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -787,12 +787,9 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
 		goto out_no_update;
 
 	/* Flush out writes to the server in order to update c/mtime.  */
-	if ((request_mask & (STATX_CTIME|STATX_MTIME)) &&
-			S_ISREG(inode->i_mode)) {
-		err = filemap_write_and_wait(inode->i_mapping);
-		if (err)
-			goto out;
-	}
+	if ((request_mask & (STATX_CTIME | STATX_MTIME)) &&
+	    S_ISREG(inode->i_mode))
+		filemap_write_and_wait(inode->i_mapping);
 
 	/*
 	 * We may force a getattr if the user cares about atime.
-- 
Gitee


From 6ff4ccebf4a5346a100ef89ae44a23300f0d534b Mon Sep 17 00:00:00 2001
From: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Date: Tue, 12 Apr 2022 15:56:11 +0800
Subject: [PATCH 172/340] mtd: rawnand: qcom: Fix clock sequencing in
 qcom_nandc_probe()

stable inclusion
from linux-4.19.231
commit 891484112f4ffd0b576e88407bbd3653abf3faba

--------------------------------

commit 5c23b3f965bc9ee696bf2ed4bdc54d339dd9a455 upstream.

Interacting with a NAND chip on an IPQ6018 I found that the qcomsmem NAND
partition parser was returning -EPROBE_DEFER waiting for the main smem
driver to load.

This caused the board to reset. Playing about with the probe() function
shows that the problem lies in the core clock being switched off before the
nandc_unalloc() routine has completed.

If we look at how qcom_nandc_remove() tears down allocated resources we see
the expected order is

qcom_nandc_unalloc(nandc);

clk_disable_unprepare(nandc->aon_clk);
clk_disable_unprepare(nandc->core_clk);

dma_unmap_resource(&pdev->dev, nandc->base_dma, resource_size(res),
		   DMA_BIDIRECTIONAL, 0);

Tweaking probe() to both bring up and tear-down in that order removes the
reset if we end up deferring elsewhere.

Fixes: c76b78d8ec05 ("mtd: nand: Qualcomm NAND controller driver")
Signed-off-by: Bryan O'Donoghue <bryan.odonoghue@linaro.org>
Reviewed-by: Manivannan Sadhasivam <mani@kernel.org>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/20220103030316.58301-2-bryan.odonoghue@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/mtd/nand/raw/qcom_nandc.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/mtd/nand/raw/qcom_nandc.c b/drivers/mtd/nand/raw/qcom_nandc.c
index 07d8750313fd..0e4be6814fc7 100644
--- a/drivers/mtd/nand/raw/qcom_nandc.c
+++ b/drivers/mtd/nand/raw/qcom_nandc.c
@@ -10,7 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  */
-
 #include <linux/clk.h>
 #include <linux/slab.h>
 #include <linux/bitops.h>
@@ -2957,10 +2956,6 @@ static int qcom_nandc_probe(struct platform_device *pdev)
 	if (!nandc->base_dma)
 		return -ENXIO;
 
-	ret = qcom_nandc_alloc(nandc);
-	if (ret)
-		goto err_nandc_alloc;
-
 	ret = clk_prepare_enable(nandc->core_clk);
 	if (ret)
 		goto err_core_clk;
@@ -2969,6 +2964,10 @@ static int qcom_nandc_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_aon_clk;
 
+	ret = qcom_nandc_alloc(nandc);
+	if (ret)
+		goto err_nandc_alloc;
+
 	ret = qcom_nandc_setup(nandc);
 	if (ret)
 		goto err_setup;
@@ -2980,15 +2979,14 @@ static int qcom_nandc_probe(struct platform_device *pdev)
 	return 0;
 
 err_setup:
+	qcom_nandc_unalloc(nandc);
+err_nandc_alloc:
 	clk_disable_unprepare(nandc->aon_clk);
 err_aon_clk:
 	clk_disable_unprepare(nandc->core_clk);
 err_core_clk:
-	qcom_nandc_unalloc(nandc);
-err_nandc_alloc:
 	dma_unmap_resource(dev, res->start, resource_size(res),
 			   DMA_BIDIRECTIONAL, 0);
-
 	return ret;
 }
 
-- 
Gitee


From b4a48e80b131d001c3981eae7c05c1dbcfec720d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:56:12 +0800
Subject: [PATCH 173/340] net: sched: limit TC_ACT_REPEAT loops

stable inclusion
from linux-4.19.231
commit f63c9fa36bd7548fd07792127057cccb68a7e274

--------------------------------

commit 5740d068909676d4bdb5c9c00c37a83df7728909 upstream.

We have been living dangerously, at the mercy of malicious users,
abusing TC_ACT_REPEAT, as shown by this syzpot report [1].

Add an arbitrary limit (32) to the number of times an action can
return TC_ACT_REPEAT.

v2: switch the limit to 32 instead of 10.
    Use net_warn_ratelimited() instead of pr_err_once().

[1] (C repro available on demand)

rcu: INFO: rcu_preempt self-detected stall on CPU
rcu:    1-...!: (10500 ticks this GP) idle=021/1/0x4000000000000000 softirq=5592/5592 fqs=0
        (t=10502 jiffies g=5305 q=190)
rcu: rcu_preempt kthread timer wakeup didn't happen for 10502 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402
rcu:    Possible timer handling issue on cpu=0 timer-softirq=3527
rcu: rcu_preempt kthread starved for 10505 jiffies! g5305 f0x0 RCU_GP_WAIT_FQS(5) ->state=0x402 ->cpu=0
rcu:    Unless rcu_preempt kthread gets sufficient CPU time, OOM is now expected behavior.
rcu: RCU grace-period kthread stack dump:
task:rcu_preempt     state:I stack:29344 pid:   14 ppid:     2 flags:0x00004000
Call Trace:
 <TASK>
 context_switch kernel/sched/core.c:4986 [inline]
 __schedule+0xab2/0x4db0 kernel/sched/core.c:6295
 schedule+0xd2/0x260 kernel/sched/core.c:6368
 schedule_timeout+0x14a/0x2a0 kernel/time/timer.c:1881
 rcu_gp_fqs_loop+0x186/0x810 kernel/rcu/tree.c:1963
 rcu_gp_kthread+0x1de/0x320 kernel/rcu/tree.c:2136
 kthread+0x2e9/0x3a0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
 </TASK>
rcu: Stack dump where RCU GP kthread last ran:
Sending NMI from CPU 1 to CPUs 0:
NMI backtrace for cpu 0
CPU: 0 PID: 3646 Comm: syz-executor358 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:rep_nop arch/x86/include/asm/vdso/processor.h:13 [inline]
RIP: 0010:cpu_relax arch/x86/include/asm/vdso/processor.h:18 [inline]
RIP: 0010:pv_wait_head_or_lock kernel/locking/qspinlock_paravirt.h:437 [inline]
RIP: 0010:__pv_queued_spin_lock_slowpath+0x3b8/0xb40 kernel/locking/qspinlock.c:508
Code: 48 89 eb c6 45 01 01 41 bc 00 80 00 00 48 c1 e9 03 83 e3 07 41 be 01 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 8d 2c 01 eb 0c <f3> 90 41 83 ec 01 0f 84 72 04 00 00 41 0f b6 45 00 38 d8 7f 08 84
RSP: 0018:ffffc9000283f1b0 EFLAGS: 00000206
RAX: 0000000000000003 RBX: 0000000000000000 RCX: 1ffff1100fc0071e
RDX: 0000000000000001 RSI: 0000000000000201 RDI: 0000000000000000
RBP: ffff88807e0038f0 R08: 0000000000000001 R09: ffffffff8ffbf9ff
R10: 0000000000000001 R11: 0000000000000001 R12: 0000000000004c1e
R13: ffffed100fc0071e R14: 0000000000000001 R15: ffff8880b9c3aa80
FS:  00005555562bf300(0000) GS:ffff8880b9c00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007ffdbfef12b8 CR3: 00000000723c2000 CR4: 00000000003506f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 pv_queued_spin_lock_slowpath arch/x86/include/asm/paravirt.h:591 [inline]
 queued_spin_lock_slowpath arch/x86/include/asm/qspinlock.h:51 [inline]
 queued_spin_lock include/asm-generic/qspinlock.h:85 [inline]
 do_raw_spin_lock+0x200/0x2b0 kernel/locking/spinlock_debug.c:115
 spin_lock_bh include/linux/spinlock.h:354 [inline]
 sch_tree_lock include/net/sch_generic.h:610 [inline]
 sch_tree_lock include/net/sch_generic.h:605 [inline]
 prio_tune+0x3b9/0xb50 net/sched/sch_prio.c:211
 prio_init+0x5c/0x80 net/sched/sch_prio.c:244
 qdisc_create.constprop.0+0x44a/0x10f0 net/sched/sch_api.c:1253
 tc_modify_qdisc+0x4c5/0x1980 net/sched/sch_api.c:1660
 rtnetlink_rcv_msg+0x413/0xb80 net/core/rtnetlink.c:5594
 netlink_rcv_skb+0x153/0x420 net/netlink/af_netlink.c:2494
 netlink_unicast_kernel net/netlink/af_netlink.c:1317 [inline]
 netlink_unicast+0x539/0x7e0 net/netlink/af_netlink.c:1343
 netlink_sendmsg+0x904/0xe00 net/netlink/af_netlink.c:1919
 sock_sendmsg_nosec net/socket.c:705 [inline]
 sock_sendmsg+0xcf/0x120 net/socket.c:725
 ____sys_sendmsg+0x6e8/0x810 net/socket.c:2413
 ___sys_sendmsg+0xf3/0x170 net/socket.c:2467
 __sys_sendmsg+0xe5/0x1b0 net/socket.c:2496
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f7ee98aae99
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffdbfef12d8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
RAX: ffffffffffffffda RBX: 00007ffdbfef1300 RCX: 00007f7ee98aae99
RDX: 0000000000000000 RSI: 0000000020000000 RDI: 0000000000000003
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 000000000000000d R11: 0000000000000246 R12: 00007ffdbfef12f0
R13: 00000000000f4240 R14: 000000000004ca47 R15: 00007ffdbfef12e4
 </TASK>
INFO: NMI handler (nmi_cpu_backtrace_handler) took too long to run: 2.293 msecs
NMI backtrace for cpu 1
CPU: 1 PID: 3260 Comm: kworker/1:3 Not tainted 5.17.0-rc3-syzkaller-00149-gbf8e59fd315f #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Workqueue: mld mld_ifc_work
Call Trace:
 <IRQ>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 nmi_cpu_backtrace.cold+0x47/0x144 lib/nmi_backtrace.c:111
 nmi_trigger_cpumask_backtrace+0x1b3/0x230 lib/nmi_backtrace.c:62
 trigger_single_cpu_backtrace include/linux/nmi.h:164 [inline]
 rcu_dump_cpu_stacks+0x25e/0x3f0 kernel/rcu/tree_stall.h:343
 print_cpu_stall kernel/rcu/tree_stall.h:604 [inline]
 check_cpu_stall kernel/rcu/tree_stall.h:688 [inline]
 rcu_pending kernel/rcu/tree.c:3919 [inline]
 rcu_sched_clock_irq.cold+0x5c/0x759 kernel/rcu/tree.c:2617
 update_process_times+0x16d/0x200 kernel/time/timer.c:1785
 tick_sched_handle+0x9b/0x180 kernel/time/tick-sched.c:226
 tick_sched_timer+0x1b0/0x2d0 kernel/time/tick-sched.c:1428
 __run_hrtimer kernel/time/hrtimer.c:1685 [inline]
 __hrtimer_run_queues+0x1c0/0xe50 kernel/time/hrtimer.c:1749
 hrtimer_interrupt+0x31c/0x790 kernel/time/hrtimer.c:1811
 local_apic_timer_interrupt arch/x86/kernel/apic/apic.c:1086 [inline]
 __sysvec_apic_timer_interrupt+0x146/0x530 arch/x86/kernel/apic/apic.c:1103
 sysvec_apic_timer_interrupt+0x8e/0xc0 arch/x86/kernel/apic/apic.c:1097
 </IRQ>
 <TASK>
 asm_sysvec_apic_timer_interrupt+0x12/0x20 arch/x86/include/asm/idtentry.h:638
RIP: 0010:__sanitizer_cov_trace_const_cmp4+0xc/0x70 kernel/kcov.c:286
Code: 00 00 00 48 89 7c 30 e8 48 89 4c 30 f0 4c 89 54 d8 20 48 89 10 5b c3 0f 1f 80 00 00 00 00 41 89 f8 bf 03 00 00 00 4c 8b 14 24 <89> f1 65 48 8b 34 25 00 70 02 00 e8 14 f9 ff ff 84 c0 74 4b 48 8b
RSP: 0018:ffffc90002c5eea8 EFLAGS: 00000246
RAX: 0000000000000007 RBX: ffff88801c625800 RCX: 0000000000000000
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
RBP: ffff8880137d3100 R08: 0000000000000000 R09: 0000000000000000
R10: ffffffff874fcd88 R11: 0000000000000000 R12: ffff88801d692dc0
R13: ffff8880137d3104 R14: 0000000000000000 R15: ffff88801d692de8
 tcf_police_act+0x358/0x11d0 net/sched/act_police.c:256
 tcf_action_exec net/sched/act_api.c:1049 [inline]
 tcf_action_exec+0x1a6/0x530 net/sched/act_api.c:1026
 tcf_exts_exec include/net/pkt_cls.h:326 [inline]
 route4_classify+0xef0/0x1400 net/sched/cls_route.c:179
 __tcf_classify net/sched/cls_api.c:1549 [inline]
 tcf_classify+0x3e8/0x9d0 net/sched/cls_api.c:1615
 prio_classify net/sched/sch_prio.c:42 [inline]
 prio_enqueue+0x3a7/0x790 net/sched/sch_prio.c:75
 dev_qdisc_enqueue+0x40/0x300 net/core/dev.c:3668
 __dev_xmit_skb net/core/dev.c:3756 [inline]
 __dev_queue_xmit+0x1f61/0x3660 net/core/dev.c:4081
 neigh_hh_output include/net/neighbour.h:533 [inline]
 neigh_output include/net/neighbour.h:547 [inline]
 ip_finish_output2+0x14dc/0x2170 net/ipv4/ip_output.c:228
 __ip_finish_output net/ipv4/ip_output.c:306 [inline]
 __ip_finish_output+0x396/0x650 net/ipv4/ip_output.c:288
 ip_finish_output+0x32/0x200 net/ipv4/ip_output.c:316
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip_output+0x196/0x310 net/ipv4/ip_output.c:430
 dst_output include/net/dst.h:451 [inline]
 ip_local_out+0xaf/0x1a0 net/ipv4/ip_output.c:126
 iptunnel_xmit+0x628/0xa50 net/ipv4/ip_tunnel_core.c:82
 geneve_xmit_skb drivers/net/geneve.c:966 [inline]
 geneve_xmit+0x10c8/0x3530 drivers/net/geneve.c:1077
 __netdev_start_xmit include/linux/netdevice.h:4683 [inline]
 netdev_start_xmit include/linux/netdevice.h:4697 [inline]
 xmit_one net/core/dev.c:3473 [inline]
 dev_hard_start_xmit+0x1eb/0x920 net/core/dev.c:3489
 __dev_queue_xmit+0x2985/0x3660 net/core/dev.c:4116
 neigh_hh_output include/net/neighbour.h:533 [inline]
 neigh_output include/net/neighbour.h:547 [inline]
 ip6_finish_output2+0xf7a/0x14f0 net/ipv6/ip6_output.c:126
 __ip6_finish_output net/ipv6/ip6_output.c:191 [inline]
 __ip6_finish_output+0x61e/0xe90 net/ipv6/ip6_output.c:170
 ip6_finish_output+0x32/0x200 net/ipv6/ip6_output.c:201
 NF_HOOK_COND include/linux/netfilter.h:296 [inline]
 ip6_output+0x1e4/0x530 net/ipv6/ip6_output.c:224
 dst_output include/net/dst.h:451 [inline]
 NF_HOOK include/linux/netfilter.h:307 [inline]
 NF_HOOK include/linux/netfilter.h:301 [inline]
 mld_sendpack+0x9a3/0xe40 net/ipv6/mcast.c:1826
 mld_send_cr net/ipv6/mcast.c:2127 [inline]
 mld_ifc_work+0x71c/0xdc0 net/ipv6/mcast.c:2659
 process_one_work+0x9ac/0x1650 kernel/workqueue.c:2307
 worker_thread+0x657/0x1110 kernel/workqueue.c:2454
 kthread+0x2e9/0x3a0 kernel/kthread.c:377
 ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:295
 </TASK>
----------------
Code disassembly (best guess):
   0:   48 89 eb                mov    %rbp,%rbx
   3:   c6 45 01 01             movb   $0x1,0x1(%rbp)
   7:   41 bc 00 80 00 00       mov    $0x8000,%r12d
   d:   48 c1 e9 03             shr    $0x3,%rcx
  11:   83 e3 07                and    $0x7,%ebx
  14:   41 be 01 00 00 00       mov    $0x1,%r14d
  1a:   48 b8 00 00 00 00 00    movabs $0xdffffc0000000000,%rax
  21:   fc ff df
  24:   4c 8d 2c 01             lea    (%rcx,%rax,1),%r13
  28:   eb 0c                   jmp    0x36
* 2a:   f3 90                   pause <-- trapping instruction
  2c:   41 83 ec 01             sub    $0x1,%r12d
  30:   0f 84 72 04 00 00       je     0x4a8
  36:   41 0f b6 45 00          movzbl 0x0(%r13),%eax
  3b:   38 d8                   cmp    %bl,%al
  3d:   7f 08                   jg     0x47
  3f:   84                      .byte 0x84

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220215235305.3272331-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/sched/act_api.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f2c4bfc79663..26851ca5566a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -609,15 +609,24 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
 restart_act_graph:
 	for (i = 0; i < nr_actions; i++) {
 		const struct tc_action *a = actions[i];
+		int repeat_ttl;
 
 		if (jmp_prgcnt > 0) {
 			jmp_prgcnt -= 1;
 			continue;
 		}
+
+		repeat_ttl = 32;
 repeat:
 		ret = a->ops->act(skb, a, res);
-		if (ret == TC_ACT_REPEAT)
-			goto repeat;	/* we need a ttl - JHS */
+
+		if (unlikely(ret == TC_ACT_REPEAT)) {
+			if (--repeat_ttl != 0)
+				goto repeat;
+			/* suspicious opcode, stop pipeline */
+			net_warn_ratelimited("TC_ACT_REPEAT abuse ?\n");
+			return TC_ACT_OK;
+		}
 
 		if (TC_ACT_EXT_CMP(ret, TC_ACT_JUMP)) {
 			jmp_prgcnt = ret & TCA_ACT_MAX_PRIO_MASK;
-- 
Gitee


From 397e43dac979a6fbb6d358a462a4df516e4406f4 Mon Sep 17 00:00:00 2001
From: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Date: Tue, 12 Apr 2022 15:56:13 +0800
Subject: [PATCH 174/340] dmaengine: sh: rcar-dmac: Check for error num after
 setting mask

stable inclusion
from linux-4.19.231
commit 783d70c94e513ca715643c00c6c275d9eb3b1a9e

--------------------------------

commit 2d21543efe332cd8c8f212fb7d365bc8b0690bfa upstream.

Because of the possible failure of the dma_supported(), the
dma_set_mask_and_coherent() may return error num.
Therefore, it should be better to check it and return the error if
fails.

Fixes: dc312349e875 ("dmaengine: rcar-dmac: Widen DMA mask to 40 bits")
Signed-off-by: Jiasheng Jiang <jiasheng@iscas.ac.cn>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Link: https://lore.kernel.org/r/20220106030939.2644320-1-jiasheng@iscas.ac.cn
Signed-off-by: Vinod Koul <vkoul@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/dma/sh/rcar-dmac.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/dma/sh/rcar-dmac.c b/drivers/dma/sh/rcar-dmac.c
index 80ff95f75199..29c51762336d 100644
--- a/drivers/dma/sh/rcar-dmac.c
+++ b/drivers/dma/sh/rcar-dmac.c
@@ -1817,7 +1817,9 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, dmac);
 	dmac->dev->dma_parms = &dmac->parms;
 	dma_set_max_seg_size(dmac->dev, RCAR_DMATCR_MASK);
-	dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+	ret = dma_set_mask_and_coherent(dmac->dev, DMA_BIT_MASK(40));
+	if (ret)
+		return ret;
 
 	ret = rcar_dmac_parse_of(&pdev->dev, dmac);
 	if (ret < 0)
-- 
Gitee


From 8dff553c6320897875ff3edb77540ebcbac5aaeb Mon Sep 17 00:00:00 2001
From: JaeSang Yoo <js.yoo.5b@gmail.com>
Date: Tue, 12 Apr 2022 15:56:14 +0800
Subject: [PATCH 175/340] tracing: Fix tp_printk option related with
 tp_printk_stop_on_boot

stable inclusion
from linux-4.19.231
commit 8f8c9e71e192823e4d76fdc53b4391642291bafc

--------------------------------

[ Upstream commit 3203ce39ac0b2a57a84382ec184c7d4a0bede175 ]

The kernel parameter "tp_printk_stop_on_boot" starts with "tp_printk" which is
the same as another kernel parameter "tp_printk". If "tp_printk" setup is
called before the "tp_printk_stop_on_boot", it will override the latter
and keep it from being set.

This is similar to other kernel parameter issues, such as:
  Commit 745a600cf1a6 ("um: console: Ignore console= option")
or init/do_mounts.c:45 (setup function of "ro" kernel param)

Fix it by checking for a "_" right after the "tp_printk" and if that
exists do not process the parameter.

Link: https://lkml.kernel.org/r/20220208195421.969326-1-jsyoo5b@gmail.com

Signed-off-by: JaeSang Yoo <jsyoo5b@gmail.com>
[ Fixed up change log and added space after if condition ]
Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/trace/trace.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 119dd5fd5840..feed30698280 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -232,6 +232,10 @@ __setup("trace_clock=", set_trace_boot_clock);
 
 static int __init set_tracepoint_printk(char *str)
 {
+	/* Ignore the "tp_printk_stop_on_boot" param */
+	if (*str == '_')
+		return 0;
+
 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
 		tracepoint_printk = 1;
 	return 1;
-- 
Gitee


From 6d17be8f94fef455d465b2b17187e0621a457713 Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Tue, 12 Apr 2022 15:56:15 +0800
Subject: [PATCH 176/340] cgroup/cpuset: Fix a race between cpuset_attach() and
 cpu hotplug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.232
commit 4eec5fe1c680a6c47a9bc0cde00960a4eb663342

--------------------------------

commit 05c7b7a92cc87ff8d7fde189d0fade250697573c upstream.

As previously discussed(https://lkml.org/lkml/2022/1/20/51),
cpuset_attach() is affected with similar cpu hotplug race,
as follow scenario:

     cpuset_attach()				cpu hotplug
    ---------------------------            ----------------------
    down_write(cpuset_rwsem)
    guarantee_online_cpus() // (load cpus_attach)
					sched_cpu_deactivate
					  set_cpu_active()
					  // will change cpu_active_mask
    set_cpus_allowed_ptr(cpus_attach)
      __set_cpus_allowed_ptr_locked()
       // (if the intersection of cpus_attach and
         cpu_active_mask is empty, will return -EINVAL)
    up_write(cpuset_rwsem)

To avoid races such as described above, protect cpuset_attach() call
with cpu_hotplug_lock.

Fixes: be367d099270 ("cgroups: let ss->can_attach and ss->attach do whole threadgroups at a time")
Cc: stable@vger.kernel.org # v2.6.32+
Reported-by: Zhao Gongyi <zhaogongyi@huawei.com>
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Acked-by: Waiman Long <longman@redhat.com>
Reviewed-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 kernel/cgroup/cpuset.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index feb91177247c..27dd33566df8 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1530,6 +1530,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 	cgroup_taskset_first(tset, &css);
 	cs = css_cs(css);
 
+	cpus_read_lock();
 	mutex_lock(&cpuset_mutex);
 
 	/* prepare for attach */
@@ -1585,6 +1586,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 		wake_up(&cpuset_attach_wq);
 
 	mutex_unlock(&cpuset_mutex);
+	cpus_read_unlock();
 }
 
 /* The various types of files and directories in a cpuset file system */
-- 
Gitee


From 220832c543ad2c75745fc87f046df5b0bc769f8b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Apr 2022 15:56:16 +0800
Subject: [PATCH 177/340] net: __pskb_pull_tail() & pskb_carve_frag_list()
 drop_monitor friends

stable inclusion
from linux-4.19.232
commit 1f4ae0f158dafa74133108bfa07b8053eb2a7898

--------------------------------

commit ef527f968ae05c6717c39f49c8709a7e2c19183a upstream.

Whenever one of these functions pull all data from an skb in a frag_list,
use consume_skb() instead of kfree_skb() to avoid polluting drop
monitoring.

Fixes: 6fa01ccd8830 ("skbuff: Add pskb_extract() helper function")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Link: https://lore.kernel.org/r/20220220154052.1308469-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/core/skbuff.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e1daab49b0eb..c623c129d0ab 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1977,7 +1977,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta)
 		/* Free pulled out fragments. */
 		while ((list = skb_shinfo(skb)->frag_list) != insp) {
 			skb_shinfo(skb)->frag_list = list->next;
-			kfree_skb(list);
+			consume_skb(list);
 		}
 		/* And insert new clone at head. */
 		if (clone) {
@@ -5482,7 +5482,7 @@ static int pskb_carve_frag_list(struct sk_buff *skb,
 	/* Free pulled out fragments. */
 	while ((list = shinfo->frag_list) != insp) {
 		shinfo->frag_list = list->next;
-		kfree_skb(list);
+		consume_skb(list);
 	}
 	/* And insert new clone at head. */
 	if (clone) {
-- 
Gitee


From df2f0cf3b5380b28391094c43c51777c839405ab Mon Sep 17 00:00:00 2001
From: Tao Liu <thomas.liu@ucloud.cn>
Date: Tue, 12 Apr 2022 15:56:17 +0800
Subject: [PATCH 178/340] gso: do not skip outer ip header in case of ipip and
 net_failover

stable inclusion
from linux-4.19.232
commit e9ffbe63f6f32f526a461756309b61c395168d73

--------------------------------

commit cc20cced0598d9a5ff91ae4ab147b3b5e99ee819 upstream.

We encounter a tcp drop issue in our cloud environment. Packet GROed in
host forwards to a VM virtio_net nic with net_failover enabled. VM acts
as a IPVS LB with ipip encapsulation. The full path like:
host gro -> vm virtio_net rx -> net_failover rx -> ipvs fullnat
 -> ipip encap -> net_failover tx -> virtio_net tx

When net_failover transmits a ipip pkt (gso_type = 0x0103, which means
SKB_GSO_TCPV4, SKB_GSO_DODGY and SKB_GSO_IPXIP4), there is no gso
did because it supports TSO and GSO_IPXIP4. But network_header points to
inner ip header.

Call Trace:
 tcp4_gso_segment        ------> return NULL
 inet_gso_segment        ------> inner iph, network_header points to
 ipip_gso_segment
 inet_gso_segment        ------> outer iph
 skb_mac_gso_segment

Afterwards virtio_net transmits the pkt, only inner ip header is modified.
And the outer one just keeps unchanged. The pkt will be dropped in remote
host.

Call Trace:
 inet_gso_segment        ------> inner iph, outer iph is skipped
 skb_mac_gso_segment
 __skb_gso_segment
 validate_xmit_skb
 validate_xmit_skb_list
 sch_direct_xmit
 __qdisc_run
 __dev_queue_xmit        ------> virtio_net
 dev_hard_start_xmit
 __dev_queue_xmit        ------> net_failover
 ip_finish_output2
 ip_output
 iptunnel_xmit
 ip_tunnel_xmit
 ipip_tunnel_xmit        ------> ipip
 dev_hard_start_xmit
 __dev_queue_xmit
 ip_finish_output2
 ip_output
 ip_forward
 ip_rcv
 __netif_receive_skb_one_core
 netif_receive_skb_internal
 napi_gro_receive
 receive_buf
 virtnet_poll
 net_rx_action

The root cause of this issue is specific with the rare combination of
SKB_GSO_DODGY and a tunnel device that adds an SKB_GSO_ tunnel option.
SKB_GSO_DODGY is set from external virtio_net. We need to reset network
header when callbacks.gso_segment() returns NULL.

This patch also includes ipv6_gso_segment(), considering SIT, etc.

Fixes: cb32f511a70b ("ipip: add GSO/TSO support")
Signed-off-by: Tao Liu <thomas.liu@ucloud.cn>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/ipv4/af_inet.c     | 5 ++++-
 net/ipv6/ip6_offload.c | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 84ce4f86bc19..86268f3f9bb9 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1338,8 +1338,11 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 	}
 
 	ops = rcu_dereference(inet_offloads[proto]);
-	if (likely(ops && ops->callbacks.gso_segment))
+	if (likely(ops && ops->callbacks.gso_segment)) {
 		segs = ops->callbacks.gso_segment(skb, features);
+		if (!segs)
+			skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
+	}
 
 	if (IS_ERR_OR_NULL(segs))
 		goto out;
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index c7e495f12011..6c47cd0ef240 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -98,6 +98,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	if (likely(ops && ops->callbacks.gso_segment)) {
 		skb_reset_transport_header(skb);
 		segs = ops->callbacks.gso_segment(skb, features);
+		if (!segs)
+			skb->network_header = skb_mac_header(skb) + nhoff - skb->head;
 	}
 
 	if (IS_ERR_OR_NULL(segs))
-- 
Gitee


From 99c367d73932d7f5e19100a4515d8729e0514b8c Mon Sep 17 00:00:00 2001
From: "daniel.starke@siemens.com" <daniel.starke@siemens.com>
Date: Tue, 12 Apr 2022 15:56:18 +0800
Subject: [PATCH 179/340] tty: n_gsm: fix proper link termination after failed
 open

stable inclusion
from linux-4.19.232
commit 337e49675ce55c23a50b92aae889ec5d910d6dc7

--------------------------------

commit e3b7468f082d106459e86e8dc6fb9bdd65553433 upstream.

Trying to open a DLCI by sending a SABM frame may fail with a timeout.
The link is closed on the initiator side without informing the responder
about this event. The responder assumes the link is open after sending a
UA frame to answer the SABM frame. The link gets stuck in a half open
state.

This patch fixes this by initiating the proper link termination procedure
after link setup timeout instead of silently closing it down.

Fixes: e1eaea46bb40 ("tty: n_gsm line discipline")
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Link: https://lore.kernel.org/r/20220218073123.2121-3-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/n_gsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 7de54c78a7b9..3e9c8a5b6445 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -1486,7 +1486,7 @@ static void gsm_dlci_t1(struct timer_list *t)
 			dlci->mode = DLCI_MODE_ADM;
 			gsm_dlci_open(dlci);
 		} else {
-			gsm_dlci_close(dlci);
+			gsm_dlci_begin_close(dlci); /* prevent half open link */
 		}
 
 		break;
-- 
Gitee


From 7287cbc17e84a4cf9c0488f353041c6880bad61d Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Tue, 12 Apr 2022 15:56:19 +0800
Subject: [PATCH 180/340] memblock: use kfree() to release kmalloced memblock
 regions

stable inclusion
from linux-4.19.232
commit a6fcced73d15ab57cd97813999edf6f19d3032f8

--------------------------------

commit c94afc46cae7ad41b2ad6a99368147879f4b0e56 upstream.

memblock.{reserved,memory}.regions may be allocated using kmalloc() in
memblock_double_array(). Use kfree() to release these kmalloced regions
indicated by memblock_{reserved,memory}_in_slab.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Fixes: 3010f876500f ("mm: discard memblock data later")
Signed-off-by: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 mm/memblock.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/mm/memblock.c b/mm/memblock.c
index 80c6975ace6f..daa2c42a02d9 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -330,14 +330,20 @@ void __init memblock_discard(void)
 		addr = __pa(memblock.reserved.regions);
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.reserved.max);
-		__memblock_free_late(addr, size);
+		if (memblock_reserved_in_slab)
+			kfree(memblock.reserved.regions);
+		else
+			__memblock_free_late(addr, size);
 	}
 
 	if (memblock.memory.regions != memblock_memory_init_regions) {
 		addr = __pa(memblock.memory.regions);
 		size = PAGE_ALIGN(sizeof(struct memblock_region) *
 				  memblock.memory.max);
-		__memblock_free_late(addr, size);
+		if (memblock_memory_in_slab)
+			kfree(memblock.memory.regions);
+		else
+			__memblock_free_late(addr, size);
 	}
 }
 #endif
-- 
Gitee


From 8e22786899efe78757423845bbf081c03a4bd314 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 12 Apr 2022 15:56:20 +0800
Subject: [PATCH 181/340] fget: clarify and improve __fget_files()
 implementation

stable inclusion
from linux-4.19.232
commit 400c2f361c25bc092d0636cfa32d0549a181e653

--------------------------------

commit e386dfc56f837da66d00a078e5314bc8382fab83 upstream.

Commit 054aa8d439b9 ("fget: check that the fd still exists after getting
a ref to it") fixed a race with getting a reference to a file just as it
was being closed.  It was a fairly minimal patch, and I didn't think
re-checking the file pointer lookup would be a measurable overhead,
since it was all right there and cached.

But I was wrong, as pointed out by the kernel test robot.

The 'poll2' case of the will-it-scale.per_thread_ops benchmark regressed
quite noticeably.  Admittedly it seems to be a very artificial test:
doing "poll()" system calls on regular files in a very tight loop in
multiple threads.

That means that basically all the time is spent just looking up file
descriptors without ever doing anything useful with them (not that doing
'poll()' on a regular file is useful to begin with).  And as a result it
shows the extra "re-check fd" cost as a sore thumb.

Happily, the regression is fixable by just writing the code to loook up
the fd to be better and clearer.  There's still a cost to verify the
file pointer, but now it's basically in the noise even for that
benchmark that does nothing else - and the code is more understandable
and has better comments too.

[ Side note: this patch is also a classic case of one that looks very
  messy with the default greedy Myers diff - it's much more legible with
  either the patience of histogram diff algorithm ]

Link: https://lore.kernel.org/lkml/20211210053743.GA36420@xsang-OptiPlex-9020/
Link: https://lore.kernel.org/lkml/20211213083154.GA20853@linux.intel.com/
Reported-by: kernel test robot <oliver.sang@intel.com>
Tested-by: Carel Si <beibei.si@intel.com>
Cc: Jann Horn <jannh@google.com>
Cc: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Baokun Li <libaokun1@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 fs/file.c | 73 +++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 57 insertions(+), 16 deletions(-)

diff --git a/fs/file.c b/fs/file.c
index 20a44d0ac400..eb10b7634d76 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -762,28 +762,69 @@ void do_close_on_exec(struct files_struct *files)
 	spin_unlock(&files->file_lock);
 }
 
-static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs)
+static inline struct file *__fget_files_rcu(struct files_struct *files,
+		unsigned int fd, fmode_t mask, unsigned int refs)
 {
-	struct files_struct *files = current->files;
-	struct file *file;
+	for (;;) {
+		struct file *file;
+		struct fdtable *fdt = rcu_dereference_raw(files->fdt);
+		struct file __rcu **fdentry;
 
-	rcu_read_lock();
-loop:
-	file = fcheck_files(files, fd);
-	if (file) {
-		/* File object ref couldn't be taken.
-		 * dup2() atomicity guarantee is the reason
-		 * we loop to catch the new file (or NULL pointer)
+		if (unlikely(fd >= fdt->max_fds))
+			return NULL;
+
+		fdentry = fdt->fd + array_index_nospec(fd, fdt->max_fds);
+		file = rcu_dereference_raw(*fdentry);
+		if (unlikely(!file))
+			return NULL;
+
+		if (unlikely(file->f_mode & mask))
+			return NULL;
+
+		/*
+		 * Ok, we have a file pointer. However, because we do
+		 * this all locklessly under RCU, we may be racing with
+		 * that file being closed.
+		 *
+		 * Such a race can take two forms:
+		 *
+		 *  (a) the file ref already went down to zero,
+		 *      and get_file_rcu_many() fails. Just try
+		 *      again:
+		 */
+		if (unlikely(!get_file_rcu_many(file, refs)))
+			continue;
+
+		/*
+		 *  (b) the file table entry has changed under us.
+		 *       Note that we don't need to re-check the 'fdt->fd'
+		 *       pointer having changed, because it always goes
+		 *       hand-in-hand with 'fdt'.
+		 *
+		 * If so, we need to put our refs and try again.
 		 */
-		if (file->f_mode & mask)
-			file = NULL;
-		else if (!get_file_rcu_many(file, refs))
-			goto loop;
-		else if (__fcheck_files(files, fd) != file) {
+		if (unlikely(rcu_dereference_raw(files->fdt) != fdt) ||
+		    unlikely(rcu_dereference_raw(*fdentry) != file)) {
 			fput_many(file, refs);
-			goto loop;
+			continue;
 		}
+
+		/*
+		 * Ok, we have a ref to the file, and checked that it
+		 * still exists.
+		 */
+		return file;
 	}
+}
+
+
+static struct file *__fget(unsigned int fd, fmode_t mask, unsigned int refs)
+{
+	struct files_struct *files = current->files;
+	struct file *file;
+
+	rcu_read_lock();
+	file = __fget_files_rcu(files, fd, mask, refs);
 	rcu_read_unlock();
 
 	return file;
-- 
Gitee


From be439c06daf3bff8bc50c6317be8410fc49e400f Mon Sep 17 00:00:00 2001
From: "daniel.starke@siemens.com" <daniel.starke@siemens.com>
Date: Tue, 12 Apr 2022 15:56:21 +0800
Subject: [PATCH 182/340] tty: n_gsm: fix encoding of control signal octet bit
 DV

stable inclusion
from linux-4.19.232
commit 28ca082153794cf5c98e7bb93d7f30f8ba46bec4

--------------------------------

commit 737b0ef3be6b319d6c1fd64193d1603311969326 upstream.

n_gsm is based on the 3GPP 07.010 and its newer version is the 3GPP 27.010.
See https://portal.3gpp.org/desktopmodules/Specifications/SpecificationDetails.aspx?specificationId=1516
The changes from 07.010 to 27.010 are non-functional. Therefore, I refer to
the newer 27.010 here. Chapter 5.4.6.3.7 describes the encoding of the
control signal octet used by the MSC (modem status command). The same
encoding is also used in convergence layer type 2 as described in chapter
5.5.2. Table 7 and 24 both require the DV (data valid) bit to be set 1 for
outgoing control signal octets sent by the DTE (data terminal equipment),
i.e. for the initiator side.
Currently, the DV bit is only set if CD (carrier detect) is on, regardless
of the side.

This patch fixes this behavior by setting the DV bit on the initiator side
unconditionally.

Fixes: e1eaea46bb40 ("tty: n_gsm line discipline")
Cc: stable@vger.kernel.org
Signed-off-by: Daniel Starke <daniel.starke@siemens.com>
Link: https://lore.kernel.org/r/20220218073123.2121-1-daniel.starke@siemens.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/tty/n_gsm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c
index 3e9c8a5b6445..3f08ad7e629a 100644
--- a/drivers/tty/n_gsm.c
+++ b/drivers/tty/n_gsm.c
@@ -428,7 +428,7 @@ static u8 gsm_encode_modem(const struct gsm_dlci *dlci)
 		modembits |= MDM_RTR;
 	if (dlci->modem_tx & TIOCM_RI)
 		modembits |= MDM_IC;
-	if (dlci->modem_tx & TIOCM_CD)
+	if (dlci->modem_tx & TIOCM_CD || dlci->gsm->initiator)
 		modembits |= MDM_DV;
 	return modembits;
 }
-- 
Gitee


From dc8da50c3488f9d6ae233b3afb0334c1aa09c7a5 Mon Sep 17 00:00:00 2001
From: Igor Pylypiv <ipylypiv@google.com>
Date: Tue, 12 Apr 2022 15:56:22 +0800
Subject: [PATCH 183/340] Revert "module, async: async_synchronize_full() on
 module init iff async is used"

stable inclusion
from linux-4.19.231
commit a0c66ac8b72f816d5631fde0ca0b39af602dce48

--------------------------------

[ Upstream commit 67d6212afda218d564890d1674bab28e8612170f ]

This reverts commit 774a1221e862b343388347bac9b318767336b20b.

We need to finish all async code before the module init sequence is
done.  In the reverted commit the PF_USED_ASYNC flag was added to mark a
thread that called async_schedule().  Then the PF_USED_ASYNC flag was
used to determine whether or not async_synchronize_full() needs to be
invoked.  This works when modprobe thread is calling async_schedule(),
but it does not work if module dispatches init code to a worker thread
which then calls async_schedule().

For example, PCI driver probing is invoked from a worker thread based on
a node where device is attached:

	if (cpu < nr_cpu_ids)
		error = work_on_cpu(cpu, local_pci_probe, &ddi);
	else
		error = local_pci_probe(&ddi);

We end up in a situation where a worker thread gets the PF_USED_ASYNC
flag set instead of the modprobe thread.  As a result,
async_synchronize_full() is not invoked and modprobe completes without
waiting for the async code to finish.

The issue was discovered while loading the pm80xx driver:
(scsi_mod.scan=async)

modprobe pm80xx                      worker
...
  do_init_module()
  ...
    pci_call_probe()
      work_on_cpu(local_pci_probe)
                                     local_pci_probe()
                                       pm8001_pci_probe()
                                         scsi_scan_host()
                                           async_schedule()
                                           worker->flags |= PF_USED_ASYNC;
                                     ...
      < return from worker >
  ...
  if (current->flags & PF_USED_ASYNC) <--- false
  	async_synchronize_full();

Commit 21c3c5d28007 ("block: don't request module during elevator init")
fixed the deadlock issue which the reverted commit 774a1221e862
("module, async: async_synchronize_full() on module init iff async is
used") tried to fix.

Since commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
request_module() from async workers") synchronous module loading from
async is not allowed.

Given that the original deadlock issue is fixed and it is no longer
allowed to call synchronous request_module() from async we can remove
PF_USED_ASYNC flag to make module init consistently invoke
async_synchronize_full() unless async module probe is requested.

Signed-off-by: Igor Pylypiv <ipylypiv@google.com>
Reviewed-by: Changyuan Lyu <changyuanl@google.com>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 include/linux/sched.h |  1 -
 kernel/async.c        |  3 ---
 kernel/module.c       | 25 +++++--------------------
 3 files changed, 5 insertions(+), 24 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0202fe007318..ec7418ff12d4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1432,7 +1432,6 @@ extern struct pid *cad_pid;
 #define PF_MEMALLOC		0x00000800	/* Allocating memory */
 #define PF_NPROC_EXCEEDED	0x00001000	/* set_user() noticed that RLIMIT_NPROC was exceeded */
 #define PF_USED_MATH		0x00002000	/* If unset the fpu must be initialized before use */
-#define PF_USED_ASYNC		0x00004000	/* Used async_schedule*(), used by module init */
 #define PF_NOFREEZE		0x00008000	/* This thread should not be frozen */
 #define PF_FROZEN		0x00010000	/* Frozen for system suspend */
 #define PF_KSWAPD		0x00020000	/* I am kswapd */
diff --git a/kernel/async.c b/kernel/async.c
index a893d6170944..4bf1b00a28d8 100644
--- a/kernel/async.c
+++ b/kernel/async.c
@@ -191,9 +191,6 @@ static async_cookie_t __async_schedule(async_func_t func, void *data, struct asy
 	atomic_inc(&entry_count);
 	spin_unlock_irqrestore(&async_lock, flags);
 
-	/* mark that this task has queued an async job, used by module init */
-	current->flags |= PF_USED_ASYNC;
-
 	/* schedule for execution */
 	queue_work(system_unbound_wq, &entry->work);
 
diff --git a/kernel/module.c b/kernel/module.c
index 38c583d54def..591c176ac6a2 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3493,12 +3493,6 @@ static noinline int do_init_module(struct module *mod)
 	}
 	freeinit->module_init = mod->init_layout.base;
 
-	/*
-	 * We want to find out whether @mod uses async during init.  Clear
-	 * PF_USED_ASYNC.  async_schedule*() will set it.
-	 */
-	current->flags &= ~PF_USED_ASYNC;
-
 	do_mod_ctors(mod);
 	/* Start the module */
 	if (mod->init != NULL)
@@ -3524,22 +3518,13 @@ static noinline int do_init_module(struct module *mod)
 
 	/*
 	 * We need to finish all async code before the module init sequence
-	 * is done.  This has potential to deadlock.  For example, a newly
-	 * detected block device can trigger request_module() of the
-	 * default iosched from async probing task.  Once userland helper
-	 * reaches here, async_synchronize_full() will wait on the async
-	 * task waiting on request_module() and deadlock.
-	 *
-	 * This deadlock is avoided by perfomring async_synchronize_full()
-	 * iff module init queued any async jobs.  This isn't a full
-	 * solution as it will deadlock the same if module loading from
-	 * async jobs nests more than once; however, due to the various
-	 * constraints, this hack seems to be the best option for now.
-	 * Please refer to the following thread for details.
+	 * is done. This has potential to deadlock if synchronous module
+	 * loading is requested from async (which is not allowed!).
 	 *
-	 * http://thread.gmane.org/gmane.linux.kernel/1420814
+	 * See commit 0fdff3ec6d87 ("async, kmod: warn on synchronous
+	 * request_module() from async workers") for more details.
 	 */
-	if (!mod->async_probe_requested && (current->flags & PF_USED_ASYNC))
+	if (!mod->async_probe_requested)
 		async_synchronize_full();
 
 	ftrace_free_mem(mod, mod->init_layout.base, mod->init_layout.base +
-- 
Gitee


From 2ea2de935b1cbae65e60c53e9dd82c130f06e447 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@mykernel.net>
Date: Thu, 14 Apr 2022 09:33:35 +0000
Subject: [PATCH 184/340] ovl: sync dirty data when remounting to ro mode

mainline inclusion
from mainline-v5.8-rc1
commit 399c109d357a7e217cf7ef551e7e234439c68c15
category: bugfix
bugzilla: 186580, https://gitee.com/openeuler/kernel/issues/I52LVN
CVE: NA

--------------------------------

sync_filesystem() does not sync dirty data for readonly filesystem during
umount, so before changing to readonly filesystem we should sync dirty data
for data integrity.

Signed-off-by: Chengguang Xu <cgxu519@mykernel.net>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Zheng Liang <zhengliang6@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/overlayfs/super.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 6729999f4a66..45fe0c71acf6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -377,11 +377,20 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 static int ovl_remount(struct super_block *sb, int *flags, char *data)
 {
 	struct ovl_fs *ofs = sb->s_fs_info;
+	struct super_block *upper_sb;
+	int ret = 0;
 
 	if (!(*flags & SB_RDONLY) && ovl_force_readonly(ofs))
 		return -EROFS;
 
-	return 0;
+	if (*flags & SB_RDONLY && !sb_rdonly(sb)) {
+		upper_sb = ofs->upper_mnt->mnt_sb;
+		down_read(&upper_sb->s_umount);
+		ret = sync_filesystem(upper_sb);
+		up_read(&upper_sb->s_umount);
+	}
+
+	return ret;
 }
 
 static const struct super_operations ovl_super_operations = {
-- 
Gitee


From b52ef3c66e2e1bcea44269e8738a8c9deedf6851 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 14 Apr 2022 09:33:36 +0000
Subject: [PATCH 185/340] ovl: fix lseek overflow on 32bit

mainline inclusion
from mainline-v5.6-rc1
commit a4ac9d45c0cd14a2adc872186431c79804b77dbf
category: bugfix
bugzilla: 95402, https://gitee.com/openeuler/kernel/issues/I52LW9
CVE: NA

--------------------------------

ovl_lseek() is using ssize_t to return the value from vfs_llseek().  On a
32-bit kernel ssize_t is a 32-bit signed int, which overflows above 2 GB.

Assign the return value of vfs_llseek() to loff_t to fix this.

Reported-by: Boris Gjenero <boris.gjenero@gmail.com>
Fixes: 9e46b840c705 ("ovl: support stacked SEEK_HOLE/SEEK_DATA")
Cc: <stable@vger.kernel.org> # v4.19
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Zheng Liang <zhengliang6@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/overlayfs/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index cd1c94f77dee..470ea215bebc 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -159,7 +159,7 @@ static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
 	struct inode *inode = file_inode(file);
 	struct fd real;
 	const struct cred *old_cred;
-	ssize_t ret;
+	loff_t ret;
 
 	/*
 	 * The two special cases below do not need to involve real fs,
-- 
Gitee


From b453e4a7c462a1182a2427029475e39ecade22d3 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 14 Apr 2022 09:33:37 +0000
Subject: [PATCH 186/340] ovl: fix IOCB_DIRECT if underlying fs doesn't support
 direct IO

mainline inclusion
from mainline-v5.15-rc5
commit 1dc1eed46f9fa4cb8a07baa24fb44c96d6dd35c9
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I51NT0
CVE: NA

--------------------------------

Normally the check at open time suffices, but e.g loop device does set
IOCB_DIRECT after doing its own checks (which are not sufficent for
overlayfs).

Make sure we don't call the underlying filesystem read/write method with
the IOCB_DIRECT if it's not supported.

Reported-by: Huang Jianan <huangjianan@oppo.com>
Fixes: 16914e6fc7e1 ("ovl: add ovl_read_iter()")
Cc: <stable@vger.kernel.org> # v4.19
Tested-by: Huang Jianan <huangjianan@oppo.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

conflicts:
	fs/overlayfs/file.c
Signed-off-by: Zheng Liang <zhengliang6@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/overlayfs/file.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index 470ea215bebc..63b7b2600d81 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -252,13 +252,19 @@ static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret)
 		return ret;
 
+	ret = -EINVAL;
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    (!real.file->f_mapping->a_ops ||
+	     !real.file->f_mapping->a_ops->direct_IO))
+		goto out_fdput;
+
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
 			    ovl_iocb_to_rwf(iocb));
 	revert_creds(old_cred);
 
 	ovl_file_accessed(file);
-
+out_fdput:
 	fdput(real);
 
 	return ret;
@@ -286,6 +292,12 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 	if (ret)
 		goto out_unlock;
 
+	ret = -EINVAL;
+	if (iocb->ki_flags & IOCB_DIRECT &&
+	    (!real.file->f_mapping->a_ops ||
+	     !real.file->f_mapping->a_ops->direct_IO))
+		goto out_fdput;
+
 	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	file_start_write(real.file);
 	ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
@@ -295,7 +307,7 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
 
 	/* Update size */
 	ovl_copyattr(ovl_inode_real(inode), inode);
-
+out_fdput:
 	fdput(real);
 
 out_unlock:
-- 
Gitee


From c1662fc5abe162b5d73e11cc524af8fa26e42680 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Thu, 14 Apr 2022 09:33:38 +0000
Subject: [PATCH 187/340] ovl: fix uninitialized pointer read in
 ovl_lookup_real_one()

mainline inclusion
from mainline-v5.14-rc6
commit 580c610429b3994e8db24418927747cf28443cde
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I51PNB
CVE: NA

--------------------------------

One error path can result in release_dentry_name_snapshot() being called
before "name" was initialized by take_dentry_name_snapshot().

Fix by moving the release_dentry_name_snapshot() to immediately after the
only use.

Reported-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>

conflicts:
	fs/overlayfs/export.c
Signed-off-by: Zheng Liang <zhengliang6@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/overlayfs/export.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index 4028e04a7b0b..bf6c97ce6d8b 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -398,6 +398,7 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 	 */
 	take_dentry_name_snapshot(&name, real);
 	this = lookup_one_len(name.name, connected, strlen(name.name));
+	release_dentry_name_snapshot(&name);
 	err = PTR_ERR(this);
 	if (IS_ERR(this)) {
 		goto fail;
@@ -412,7 +413,6 @@ static struct dentry *ovl_lookup_real_one(struct dentry *connected,
 	}
 
 out:
-	release_dentry_name_snapshot(&name);
 	dput(parent);
 	inode_unlock(dir);
 	return this;
-- 
Gitee


From 1b1798702f1dabcf5dc978cfc86f43906f2879f6 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Thu, 14 Apr 2022 09:33:39 +0000
Subject: [PATCH 188/340] hamradio: defer 6pack kfree after unregister_netdev

mainline inclusion
from mainline-v5.16-rc1
commit 0b9111922b1f399aba6ed1e1b8f2079c3da1aed8
category: bugfix
bugzilla: 186546, https://gitee.com/openeuler/kernel/issues/I51J84
CVE: CVE-2022-1198

--------------------------------

There is a possible race condition (use-after-free) like below

 (USE)                       |  (FREE)
  dev_queue_xmit             |
   __dev_queue_xmit          |
    __dev_xmit_skb           |
     sch_direct_xmit         | ...
      xmit_one               |
       netdev_start_xmit     | tty_ldisc_kill
        __netdev_start_xmit  |  6pack_close
         sp_xmit             |   kfree
          sp_encaps          |
                             |

According to the patch "defer ax25 kfree after unregister_netdev", this
patch reorder the kfree after the unregister_netdev to avoid the possible
UAF as the unregister_netdev() is well synchronized and won't return if
there is a running routine.

Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/hamradio/6pack.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 1001e9a2edd4..91180761b8cb 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -677,11 +677,13 @@ static void sixpack_close(struct tty_struct *tty)
 	del_timer_sync(&sp->tx_t);
 	del_timer_sync(&sp->resync_t);
 
-	/* Free all 6pack frame buffers. */
+	unregister_netdev(sp->dev);
+
+	/* Free all 6pack frame buffers after unreg. */
 	kfree(sp->rbuff);
 	kfree(sp->xbuff);
 
-	unregister_netdev(sp->dev);
+	free_netdev(sp->dev);
 }
 
 /* Perform I/O control on an active 6pack channel. */
-- 
Gitee


From 2170508665c2533a62505f129b91e0c90397884a Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Thu, 14 Apr 2022 09:33:40 +0000
Subject: [PATCH 189/340] hamradio: remove needs_free_netdev to avoid UAF

mainline inclusion
from mainline-v5.16-rc2
commit 81b1d548d00bcd028303c4f3150fa753b9b8aa71
category: bugfix
bugzilla: 186546, https://gitee.com/openeuler/kernel/issues/I51J84
CVE: CVE-2022-1198

--------------------------------

The former patch "defer 6pack kfree after unregister_netdev" reorders
the kfree of two buffer after the unregister_netdev to prevent the race
condition. It also adds free_netdev() function in sixpack_close(), which
is a direct copy from the similar code in mkiss_close().

However, in sixpack driver, the flag needs_free_netdev is set to true in
sp_setup(), hence the unregister_netdev() will free the netdev
automatically. Therefore, as the sp is netdev_priv, use-after-free
occurs.

This patch removes the needs_free_netdev = true and just let the
free_netdev to finish this deallocation task.

Fixes: 0b9111922b1f ("hamradio: defer 6pack kfree after unregister_netdev")
Signed-off-by: Lin Ma <linma@zju.edu.cn>
Link: https://lore.kernel.org/r/20211111141402.7551-1-linma@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/hamradio/6pack.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 91180761b8cb..592c1d92a8e0 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -311,7 +311,6 @@ static void sp_setup(struct net_device *dev)
 {
 	/* Finish setting up the DEVICE info. */
 	dev->netdev_ops		= &sp_netdev_ops;
-	dev->needs_free_netdev	= true;
 	dev->mtu		= SIXP_MTU;
 	dev->hard_header_len	= AX25_MAX_HEADER_LEN;
 	dev->header_ops 	= &ax25_header_ops;
-- 
Gitee


From 12f56ab1a40bd5caa1d83ecaf9d32593f51fbd82 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Thu, 14 Apr 2022 09:33:41 +0000
Subject: [PATCH 190/340] drivers: hamradio: 6pack: fix UAF bug caused by
 mod_timer()

mainline inclusion
from mainline-v5.18-rc1
commit efe4186e6a1b54bf38b9e05450d43b0da1fd7739
category: bugfix
bugzilla: 186546, https://gitee.com/openeuler/kernel/issues/I51J84
CVE: CVE-2022-1198

--------------------------------

When a 6pack device is detaching, the sixpack_close() will act to cleanup
necessary resources. Although del_timer_sync() in sixpack_close()
won't return if there is an active timer, one could use mod_timer() in
sp_xmit_on_air() to wake up timer again by calling userspace syscall such
as ax25_sendmsg(), ax25_connect() and ax25_ioctl().

This unexpected waked handler, sp_xmit_on_air(), realizes nothing about
the undergoing cleanup and may still call pty_write() to use driver layer
resources that have already been released.

One of the possible race conditions is shown below:

      (USE)                      |      (FREE)
ax25_sendmsg()                   |
 ax25_queue_xmit()               |
  ...                            |
  sp_xmit()                      |
   sp_encaps()                   | sixpack_close()
    sp_xmit_on_air()             |  del_timer_sync(&sp->tx_t)
     mod_timer(&sp->tx_t,...)    |  ...
                                 |  unregister_netdev()
                                 |  ...
     (wait a while)              | tty_release()
                                 |  tty_release_struct()
                                 |   release_tty()
    sp_xmit_on_air()             |    tty_kref_put(tty_struct) //FREE
     pty_write(tty_struct) //USE |    ...

The corresponding fail log is shown below:
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>

===============================================================
BUG: KASAN: use-after-free in __run_timers.part.0+0x170/0x470
Write of size 8 at addr ffff88800a652ab8 by task swapper/2/0
...
Call Trace:
  ...
  queue_work_on+0x3f/0x50
  pty_write+0xcd/0xe0pty_write+0xcd/0xe0
  sp_xmit_on_air+0xb2/0x1f0
  call_timer_fn+0x28/0x150
  __run_timers.part.0+0x3c2/0x470
  run_timer_softirq+0x3b/0x80
  __do_softirq+0xf1/0x380
  ...

This patch reorders the del_timer_sync() after the unregister_netdev()
to avoid UAF bugs. Because the unregister_netdev() is well synchronized,
it flushs out any pending queues, waits the refcount of net_device
decreases to zero and removes net_device from kernel. There is not any
running routines after executing unregister_netdev(). Therefore, we could
not arouse timer from userspace again.

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/hamradio/6pack.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c
index 592c1d92a8e0..6f7e1598106f 100644
--- a/drivers/net/hamradio/6pack.c
+++ b/drivers/net/hamradio/6pack.c
@@ -673,11 +673,11 @@ static void sixpack_close(struct tty_struct *tty)
 	 */
 	netif_stop_queue(sp->dev);
 
+	unregister_netdev(sp->dev);
+
 	del_timer_sync(&sp->tx_t);
 	del_timer_sync(&sp->resync_t);
 
-	unregister_netdev(sp->dev);
-
 	/* Free all 6pack frame buffers after unreg. */
 	kfree(sp->rbuff);
 	kfree(sp->xbuff);
-- 
Gitee


From 3ef161fb8febd46bf96b669b98e2efc590873105 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 14 Apr 2022 09:33:42 +0000
Subject: [PATCH 191/340] mm: page_counter: mitigate consequences of a
 page_counter underflow

mainline inclusion
from mainline-v5.13-rc1
commit 9317d0fffeb4c3929069cfc7377cfa2a7cd36d1d
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5241W
CVE: NA

--------------------------------

When the unsigned page_counter underflows, even just by a few pages, a
cgroup will not be able to run anything afterwards and trigger the OOM
killer in a loop.

Underflows shouldn't happen, but when they do in practice, we may just be
off by a small amount that doesn't interfere with the normal operation -
consequences don't need to be that dire.

Reset the page_counter to 0 upon underflow.  We'll issue a warning that
the accounting will be off and then try to keep limping along.

[ We used to do this with the original res_counter, where it was a
  more straight-forward correction inside the spinlock section. I
  didn't carry it forward into the lockless page counters for
  simplicity, but it turns out this is quite useful in practice. ]

Link: https://lkml.kernel.org/r/20210408143155.2679744-1-hannes@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Acked-by: Chris Down <chris@chrisdown.name>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Roman Gushchin <guro@fb.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/page_counter.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/mm/page_counter.c b/mm/page_counter.c
index 147ff99187b8..58903e3bfdd3 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -57,9 +57,13 @@ void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
 	long new;
 
 	new = atomic_long_sub_return(nr_pages, &counter->usage);
-	propagate_protected_usage(counter, new);
 	/* More uncharges than charges? */
-	WARN_ON_ONCE(new < 0);
+	if (WARN_ONCE(new < 0, "page_counter underflow: %ld nr_pages=%lu\n",
+		      new, nr_pages)) {
+		new = 0;
+		atomic_long_set(&counter->usage, new);
+	}
+	propagate_protected_usage(counter, new);
 }
 
 /**
-- 
Gitee


From 456a929c284b5221a1106074e545f88aee8aeca4 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Thu, 14 Apr 2022 09:33:43 +0000
Subject: [PATCH 192/340] mm: Drop reliable_reserve_size

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Commit 368d710d7f32 ("mm: Fallback to non-mirrored region below low watermark")
already set the default value of reliable_reserve_size to zero which will
disable reliable watermark check by default.

With this patch, code related to this mechanism is removed since no one use
this watermark check.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/mem_reliable.h | 17 -----------------
 mm/mem_reliable.c            | 33 ---------------------------------
 mm/page_alloc.c              |  4 ----
 3 files changed, 54 deletions(-)

diff --git a/include/linux/mem_reliable.h b/include/linux/mem_reliable.h
index 8340a24fe76d..6d57c36fb676 100644
--- a/include/linux/mem_reliable.h
+++ b/include/linux/mem_reliable.h
@@ -24,7 +24,6 @@ DECLARE_PER_CPU(long, nr_reliable_buddy_pages);
 
 extern struct percpu_counter pagecache_reliable_pages;
 extern struct percpu_counter anon_reliable_pages;
-extern unsigned long nr_reliable_reserve_pages __read_mostly;
 extern long shmem_reliable_nr_page __read_mostly;
 
 extern void add_reliable_mem_size(long sz);
@@ -118,21 +117,6 @@ static inline void mem_reliable_buddy_counter(struct page *page, int nr_page)
 		this_cpu_add(nr_reliable_buddy_pages, nr_page);
 }
 
-/* reserve mirrored memory for kernel usage */
-static inline bool mem_reliable_watermark_ok(int nr_page)
-{
-	long sum = 0;
-	int cpu;
-
-	if (!reliable_allow_fb_enabled())
-		return true;
-
-	for_each_possible_cpu(cpu)
-		sum += per_cpu(nr_reliable_buddy_pages, cpu);
-
-	return sum > nr_reliable_reserve_pages;
-}
-
 static inline bool mem_reliable_shmem_limit_check(void)
 {
 	return percpu_counter_read_positive(&reliable_shmem_used_nr_page) <
@@ -178,7 +162,6 @@ static inline void shmem_reliable_page_counter(struct page *page, int nr_page)
 static inline bool pagecache_reliable_is_enabled(void) { return false; }
 static inline bool mem_reliable_status(void) { return false; }
 static inline void mem_reliable_buddy_counter(struct page *page, int nr_page) {}
-static inline bool mem_reliable_watermark_ok(int nr_page) { return true; }
 static inline bool mem_reliable_shmem_limit_check(void) { return true; }
 static inline void reliable_lru_add(enum lru_list lru,
 					       struct page *page,
diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 6d4ab4bee3d5..83bfdf265273 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -11,8 +11,6 @@
 #include <linux/oom.h>
 #include <linux/crash_dump.h>
 
-#define MEM_RELIABLE_RESERVE_MIN 0
-
 enum mem_reliable_types {
 	MEM_RELIABLE_ALL,
 	MEM_RELIABLE_FALLBACK,
@@ -31,7 +29,6 @@ bool reliable_allow_fallback __read_mostly = true;
 bool shmem_reliable __read_mostly = true;
 struct percpu_counter reliable_shmem_used_nr_page __read_mostly;
 DEFINE_PER_CPU(long, nr_reliable_buddy_pages);
-unsigned long nr_reliable_reserve_pages = MEM_RELIABLE_RESERVE_MIN / PAGE_SIZE;
 long shmem_reliable_nr_page = LONG_MAX;
 
 bool pagecache_use_reliable_mem __read_mostly = true;
@@ -338,29 +335,6 @@ int reliable_debug_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-static unsigned long sysctl_reliable_reserve_size = MEM_RELIABLE_RESERVE_MIN;
-
-int reliable_reserve_size_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
-{
-	unsigned long *data_ptr = (unsigned long *)(table->data);
-	unsigned long old = *data_ptr;
-	int ret;
-
-	ret = proc_doulongvec_minmax(table, write, buffer, length, ppos);
-	if (ret == 0 && write) {
-		if (*data_ptr > total_reliable_mem_sz() ||
-		    *data_ptr < MEM_RELIABLE_RESERVE_MIN) {
-			*data_ptr = old;
-			return -EINVAL;
-		}
-
-		nr_reliable_reserve_pages = *data_ptr / PAGE_SIZE;
-	}
-
-	return ret;
-}
-
 #ifdef CONFIG_SHMEM
 static unsigned long sysctl_shmem_reliable_bytes_limit = ULONG_MAX;
 
@@ -417,13 +391,6 @@ static struct ctl_table reliable_ctl_table[] = {
 		.mode = 0600,
 		.proc_handler = reliable_debug_handler,
 	},
-	{
-		.procname = "reliable_reserve_size",
-		.data = &sysctl_reliable_reserve_size,
-		.maxlen = sizeof(sysctl_reliable_reserve_size),
-		.mode = 0644,
-		.proc_handler = reliable_reserve_size_handler,
-	},
 #ifdef CONFIG_SHMEM
 	{
 		.procname = "shmem_reliable_bytes_limit",
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9d4f75235420..38f2a84d3224 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4649,10 +4649,6 @@ static inline bool check_after_alloc(gfp_t *gfp_mask, unsigned int order,
 	if (*gfp_mask & __GFP_NOFAIL)
 		goto out;
 
-	/* check water mark, reserver mirrored mem for kernel */
-	if (!mem_reliable_watermark_ok(1 << order))
-		goto out_free_page;
-
 	/* percpu counter is not initialized, ignore limit check */
 	if (!mem_reliable_counter_initialized())
 		goto out;
-- 
Gitee


From 9f07eb30e62486a47d70adc4568af3265febe568 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Thu, 14 Apr 2022 09:33:44 +0000
Subject: [PATCH 193/340] mm: Add space after ReliableFileCache

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Add a space after ReliableFileCache so that the code is easy to read and
maintain.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/mem_reliable.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 83bfdf265273..646c27993df4 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -230,7 +230,8 @@ void reliable_report_meminfo(struct seq_file *m)
 		num += global_node_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
 		num += global_node_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
 		show_val_kb(m, "FileCache:        ", num);
-		show_val_kb(m, "ReliableFileCache:", nr_pagecache_pages);
+		seq_printf(m, "ReliableFileCache: %8llu kB\n",
+			   nr_pagecache_pages);
 	}
 }
 
-- 
Gitee


From 92a256aca17c823413f3fdb529ac57359d3f780b Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Thu, 14 Apr 2022 09:33:45 +0000
Subject: [PATCH 194/340] can: ems_usb: ems_usb_start_xmit(): fix double
 dev_kfree_skb() in error path

mainline inclusion
from mainline-v5.18-rc1
commit c70222752228a62135cee3409dccefd494a24646
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBP
CVE: CVE-2022-28390

--------------------------------

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
beacause can_put_echo_skb() deletes the original skb and
can_free_echo_skb() deletes the cloned skb.

Link: https://lore.kernel.org/all/20220228083639.38183-1-hbh25y@gmail.com
Fixes: 702171adeed3 ("ems_usb: Added support for EMS CPC-USB/ARM7 CAN/USB interface")
Cc: stable@vger.kernel.org
Cc: Sebastian Haas <haas@ems-wuensche.com>
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Baisong Zhong <zhongbaisong@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/can/usb/ems_usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index b7dfd4109d24..7a92f640c379 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -823,7 +823,6 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 
 		usb_unanchor_urb(urb);
 		usb_free_coherent(dev->udev, size, buf, urb->transfer_dma);
-		dev_kfree_skb(skb);
 
 		atomic_dec(&dev->active_tx_urbs);
 
-- 
Gitee


From c82b8d71f1d43c7a387c8de3a1432bd6a88c24b1 Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:46 +0000
Subject: [PATCH 195/340] svm: Export symbols for svm module

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

Export symbols for svm module.

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/mm/context.c | 2 ++
 drivers/iommu/iommu.c   | 1 +
 mm/share_pool.c         | 1 +
 3 files changed, 4 insertions(+)

diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c
index 35a103c7c22b..e7cc878a0b05 100644
--- a/arch/arm64/mm/context.c
+++ b/arch/arm64/mm/context.c
@@ -310,6 +310,7 @@ unsigned long mm_context_get(struct mm_struct *mm)
 
 	return asid;
 }
+EXPORT_SYMBOL_GPL(mm_context_get);
 
 void mm_context_put(struct mm_struct *mm)
 {
@@ -325,6 +326,7 @@ void mm_context_put(struct mm_struct *mm)
 
 	raw_spin_unlock_irqrestore(&cpu_asid_lock, flags);
 }
+EXPORT_SYMBOL_GPL(mm_context_put);
 
 /* Errata workaround post TTBRx_EL1 update. */
 asmlinkage void post_ttbr_update_workaround(void)
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 63bd5940b290..84a59762b037 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -2287,6 +2287,7 @@ int iommu_request_dm_for_dev(struct device *dev)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(iommu_request_dm_for_dev);
 
 const struct iommu_ops *iommu_ops_from_fwnode(struct fwnode_handle *fwnode)
 {
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 90733b807f12..40147ef85ba5 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -91,6 +91,7 @@ static int __read_mostly enable_share_k2u_spg = 1;
 int sysctl_ac_mode = AC_NONE;
 /* debug mode */
 int sysctl_sp_debug_mode;
+EXPORT_SYMBOL(sysctl_sp_debug_mode);
 
 int sysctl_share_pool_map_lock_enable;
 
-- 
Gitee


From fd64ea2e7f74c11a088b3ce6701e704b8ff728a5 Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:47 +0000
Subject: [PATCH 196/340] svm: Delete get meminfo interface in svm ioctl

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

The functions(svm_get_hugeinfo, svm_get_phy_memory_info) can be replaced
by reading /proc/meminfo, we will never use these functions.

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/char/svm.c | 118 ---------------------------------------------
 1 file changed, 118 deletions(-)

diff --git a/drivers/char/svm.c b/drivers/char/svm.c
index 098b84529d0e..c48cbce39661 100644
--- a/drivers/char/svm.c
+++ b/drivers/char/svm.c
@@ -43,8 +43,6 @@
 #define SVM_IOCTL_LOAD_FLAG		0xfffa
 #define SVM_IOCTL_PIN_MEMORY		0xfff7
 #define SVM_IOCTL_UNPIN_MEMORY		0xfff5
-#define SVM_IOCTL_GETHUGEINFO		0xfff6
-#define SVM_IOCTL_GET_PHYMEMINFO	0xfff8
 #define SVM_IOCTL_REMAP_PROC		0xfff4
 
 #define SVM_REMAP_MEM_LEN_MAX		(16 * 1024 * 1024)
@@ -123,23 +121,6 @@ struct svm_proc_mem {
 	u64 buf;
 };
 
-struct meminfo {
-	unsigned long hugetlbfree;
-	unsigned long hugetlbtotal;
-};
-
-struct phymeminfo {
-	unsigned long normal_total;
-	unsigned long normal_free;
-	unsigned long huge_total;
-	unsigned long huge_free;
-};
-
-struct phymeminfo_ioctl {
-	struct phymeminfo *info;
-	unsigned long nodemask;
-};
-
 struct spalloc {
 	unsigned long addr;
 	unsigned long size;
@@ -169,10 +150,6 @@ static char *svm_cmd_to_string(unsigned int cmd)
 		return "pin memory";
 	case SVM_IOCTL_UNPIN_MEMORY:
 		return "unpin memory";
-	case SVM_IOCTL_GETHUGEINFO:
-		return "get hugeinfo";
-	case SVM_IOCTL_GET_PHYMEMINFO:
-		return "get physical memory info";
 	case SVM_IOCTL_REMAP_PROC:
 		return "remap proc";
 	case SVM_IOCTL_LOAD_FLAG:
@@ -1562,95 +1539,6 @@ static int svm_set_rc(unsigned long __user *arg)
 	return 0;
 }
 
-static long svm_get_hugeinfo(unsigned long __user *arg)
-{
-	struct hstate *h = &default_hstate;
-	struct meminfo info;
-
-	if (!acpi_disabled)
-		return -EPERM;
-
-	if (arg == NULL)
-		return -EINVAL;
-
-	if (!hugepages_supported())
-		return -ENOTSUPP;
-
-	info.hugetlbfree = h->free_huge_pages;
-	info.hugetlbtotal = h->nr_huge_pages;
-
-	if (copy_to_user((void __user *)arg, &info, sizeof(info)))
-		return -EFAULT;
-
-	pr_info("svm get hugetlb info: order(%u), max_huge_pages(%lu),"
-			"nr_huge_pages(%lu), free_huge_pages(%lu), resv_huge_pages(%lu)",
-			h->order,
-			h->max_huge_pages,
-			h->nr_huge_pages,
-			h->free_huge_pages,
-			h->resv_huge_pages);
-
-	return 0;
-}
-
-static void svm_get_node_memory_info_inc(unsigned long nid, struct phymeminfo *info)
-{
-	struct sysinfo i;
-	struct hstate *h = &default_hstate;
-	unsigned long huge_free = 0;
-	unsigned long huge_total = 0;
-
-	if (hugepages_supported()) {
-		huge_free = h->free_huge_pages_node[nid] * (PAGE_SIZE << huge_page_order(h));
-		huge_total = h->nr_huge_pages_node[nid] * (PAGE_SIZE << huge_page_order(h));
-	}
-
-#ifdef CONFIG_NUMA
-	si_meminfo_node(&i, nid);
-#else
-	si_meminfo(&i);
-#endif
-	info->normal_free += i.freeram * PAGE_SIZE;
-	info->normal_total += i.totalram * PAGE_SIZE - huge_total;
-	info->huge_total += huge_total;
-	info->huge_free += huge_free;
-}
-
-static void __svm_get_memory_info(unsigned long nodemask, struct phymeminfo *info)
-{
-	memset(info, 0x0, sizeof(struct phymeminfo));
-
-	nodemask = nodemask & ((1UL << MAX_NUMNODES) - 1);
-
-	while (nodemask) {
-		unsigned long nid = find_first_bit(&nodemask, BITS_PER_LONG);
-		if (node_isset(nid, node_online_map)) {
-			(void)svm_get_node_memory_info_inc(nid, info);
-		}
-
-		nodemask &= ~(1UL << nid);
-	}
-}
-
-static long svm_get_phy_memory_info(unsigned long __user *arg)
-{
-	struct phymeminfo info;
-	struct phymeminfo_ioctl para;
-
-	if (arg == NULL)
-		return -EINVAL;
-
-	if (copy_from_user(&para, (void __user *)arg, sizeof(para)))
-		return -EFAULT;
-
-	__svm_get_memory_info(para.nodemask, &info);
-
-	if (copy_to_user((void __user *)para.info, &info, sizeof(info)))
-		return -EFAULT;
-
-	return 0;
-}
-
 static long svm_remap_get_phys(struct mm_struct *mm, struct vm_area_struct *vma,
 			       unsigned long addr, unsigned long *phys,
 			       unsigned long *page_size, unsigned long *offset)
@@ -2092,12 +1980,6 @@ static long svm_ioctl(struct file *file, unsigned int cmd,
 	case SVM_IOCTL_UNPIN_MEMORY:
 		err = svm_unpin_memory((unsigned long __user *)arg);
 		break;
-	case SVM_IOCTL_GETHUGEINFO:
-		err = svm_get_hugeinfo((unsigned long __user *)arg);
-		break;
-	case SVM_IOCTL_GET_PHYMEMINFO:
-		err = svm_get_phy_memory_info((unsigned long __user *)arg);
-		break;
 	case SVM_IOCTL_REMAP_PROC:
 		err = svm_remap_proc((unsigned long __user *)arg);
 		break;
-- 
Gitee


From 6fe391e5c3b97d2e60fcaad102ab96852a5e033b Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:48 +0000
Subject: [PATCH 197/340] svm: Delete unused function
 sysrq_sched_debug_show_export

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

Delete unused function sysrq_sched_debug_show_export

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/char/svm.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/char/svm.c b/drivers/char/svm.c
index c48cbce39661..864cb79c6738 100644
--- a/drivers/char/svm.c
+++ b/drivers/char/svm.c
@@ -163,8 +163,6 @@ static char *svm_cmd_to_string(unsigned int cmd)
 	return NULL;
 }
 
-extern void sysrq_sched_debug_tidy(void);
-
 /*
  * image word of slot
  * SVM_IMAGE_WORD_INIT: initial value, indicating that the slot is not used.
@@ -423,17 +421,6 @@ static int svm_va2pa_trunk_init(struct device *dev)
 	return 0;
 }
 
-void sysrq_sched_debug_show_export(void)
-{
-#ifdef CONFIG_SCHED_DEBUG
-	sysrq_sched_debug_tidy();
-#else
-	pr_err("Not open CONFIG_SCHED_DEBUG\n");
-#endif
-	panic("pcie heart miss\n");
-}
-EXPORT_SYMBOL(sysrq_sched_debug_show_export);
-
 static struct svm_process *find_svm_process(unsigned long asid)
 {
 	struct rb_node *node = svm_process_root.rb_node;
-- 
Gitee


From 492bf18ea8aac80805383a4b898ac7e999526d9d Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:49 +0000
Subject: [PATCH 198/340] ascend: mm: Add MAP_ALIGN flag to map aligned va

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

svm module use svm_get_unmapped_area ops to map an aligned va, which
used by mapping l2buf memory. The svm_get_unmapped_area use a lot of
duplicated codes, we add MAP_ALIGN to adjust mapinfo alignmask.

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/alpha/include/uapi/asm/mman.h   | 1 +
 arch/mips/include/uapi/asm/mman.h    | 1 +
 arch/parisc/include/uapi/asm/mman.h  | 1 +
 arch/powerpc/include/uapi/asm/mman.h | 1 +
 arch/sparc/include/uapi/asm/mman.h   | 1 +
 arch/xtensa/include/uapi/asm/mman.h  | 2 ++
 include/uapi/asm-generic/mman.h      | 2 ++
 mm/mmap.c                            | 8 ++++++++
 8 files changed, 17 insertions(+)

diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 1c7ce2716ad3..977df8306905 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -35,6 +35,7 @@
 #define MAP_FIXED_NOREPLACE	0x200000/* MAP_FIXED which doesn't unmap underlying mapping */
 #define MAP_PA32BIT	0x400000	/* physical address is within 4G */
 #define MAP_CHECKNODE	0x800000	/* hugetlb numa node check */
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
 
 #define MS_ASYNC	1		/* sync memory asynchronously */
 #define MS_SYNC		2		/* synchronous memory sync */
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index 4570a54ac1d9..1a6ffb5515ed 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -53,6 +53,7 @@
 #define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 #define MAP_PA32BIT	0x400000	/* physical address is within 4G */
 #define MAP_CHECKNODE	0x800000	/* hugetlb numa node check */
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
 
 /*
  * Flags for msync
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 06857eb1bee8..8d88c457fe10 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -29,6 +29,7 @@
 #define MAP_FIXED_NOREPLACE 0x100000	/* MAP_FIXED which doesn't unmap underlying mapping */
 #define MAP_PA32BIT	0x400000	/* physical address is within 4G */
 #define MAP_CHECKNODE	0x800000	/* hugetlb numa node check */
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
 
 #define MS_SYNC		1		/* synchronous memory sync */
 #define MS_ASYNC	2		/* sync memory asynchronously */
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index 24354f792b00..802cc3fe1358 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -31,6 +31,7 @@
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 #define MAP_PA32BIT	0x400000	/* physical address is within 4G */
 #define MAP_CHECKNODE	0x800000	/* hugetlb numa node check */
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
 
 /* Override any generic PKEY permission defines */
 #define PKEY_DISABLE_EXECUTE   0x4
diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h
index 214abe17f44a..953528235c2e 100644
--- a/arch/sparc/include/uapi/asm/mman.h
+++ b/arch/sparc/include/uapi/asm/mman.h
@@ -28,6 +28,7 @@
 #define MAP_HUGETLB	0x40000		/* create a huge page mapping */
 #define MAP_PA32BIT	0x400000	/* physical address is within 4G */
 #define MAP_CHECKNODE	0x800000	/* hugetlb numa node check */
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
 
 
 #endif /* _UAPI__SPARC_MMAN_H__ */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 2c9d70560238..97239cce3ffc 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -67,6 +67,8 @@
 # define MAP_UNINITIALIZED 0x0		/* Don't support this flag */
 #endif
 
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
+
 /*
  * Flags for msync
  */
diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h
index defdf92911c3..028b4e846185 100644
--- a/include/uapi/asm-generic/mman.h
+++ b/include/uapi/asm-generic/mman.h
@@ -21,6 +21,8 @@
 #define MAP_REPLACE	0x1000000
 #endif
 
+#define MAP_ALIGN	0x2000000	/* create an aligned mapping */
+
 /* Bits [26:31] are reserved, see mman-common.h for MAP_HUGETLB usage */
 
 #define MCL_CURRENT	1		/* lock all current mappings */
diff --git a/mm/mmap.c b/mm/mmap.c
index e529384b8ac6..ce3fba7c31cb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2382,6 +2382,10 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
 
 	sp_area_work_around(&info, flags);
 
+	/* Align vmstart to len */
+	if (filp && (flags & MAP_ALIGN))
+		info.align_mask = ((len >> PAGE_SHIFT) - 1) << PAGE_SHIFT;
+
 	return vm_unmapped_area(&info);
 }
 #endif
@@ -2437,6 +2441,10 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
 
 	sp_area_work_around(&info, flags);
 
+	/* Align vmstart to len */
+	if (filp && (flags & MAP_ALIGN))
+		info.align_mask = ((len >> PAGE_SHIFT) - 1) << PAGE_SHIFT;
+
 	addr = vm_unmapped_area(&info);
 
 	/*
-- 
Gitee


From 0e5d94f9e3f905f9faf3ca12280df300271c80a8 Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:50 +0000
Subject: [PATCH 199/340] svm: Delete unused svm_get_unmapped_area ops

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

svm_get_unmapped_area will be no longer used.

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/char/svm.c | 60 ----------------------------------------------
 1 file changed, 60 deletions(-)

diff --git a/drivers/char/svm.c b/drivers/char/svm.c
index 864cb79c6738..1f24328b3504 100644
--- a/drivers/char/svm.c
+++ b/drivers/char/svm.c
@@ -1685,65 +1685,6 @@ static int svm_proc_load_flag(int __user *arg)
 	return put_user(flag, arg);
 }
 
-static unsigned long svm_get_unmapped_area(struct file *file,
-		unsigned long addr0, unsigned long len,
-		unsigned long pgoff, unsigned long flags)
-{
-	unsigned long addr = addr0;
-	struct mm_struct *mm = current->mm;
-	struct vm_unmapped_area_info info;
-	struct svm_device *sdev = file_to_sdev(file);
-
-	if (!acpi_disabled)
-		return -EPERM;
-
-	if (flags & MAP_FIXED) {
-		if (IS_ALIGNED(addr, len))
-			return addr;
-
-		dev_err(sdev->dev, "MAP_FIXED but not aligned\n");
-		return -EINVAL; //lint !e570
-	}
-
-	if (addr) {
-		struct vm_area_struct *vma = NULL;
-
-		addr = ALIGN(addr, len);
-
-		if (dvpp_mmap_check(addr, len, flags))
-			return -ENOMEM;
-
-		vma = find_vma(mm, addr);
-		if (TASK_SIZE - len >= addr && addr >= mmap_min_addr &&
-		   (vma == NULL || addr + len <= vm_start_gap(vma)))
-			return addr;
-	}
-
-	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-	info.length = len;
-	info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-	info.high_limit = ((mm->mmap_base <= DVPP_MMAP_BASE) ?
-			   mm->mmap_base : DVPP_MMAP_BASE);
-	info.align_mask = ((len >> PAGE_SHIFT) - 1) << PAGE_SHIFT;
-	info.align_offset = pgoff << PAGE_SHIFT;
-
-	addr = vm_unmapped_area(&info);
-
-	if (offset_in_page(addr)) {
-		VM_BUG_ON(addr != -ENOMEM);
-		info.flags = 0;
-		info.low_limit = TASK_UNMAPPED_BASE;
-		info.high_limit = DVPP_MMAP_BASE;
-
-		if (enable_mmap_dvpp)
-			dvpp_mmap_get_area(&info, flags);
-
-		addr = vm_unmapped_area(&info);
-	}
-
-	return addr;
-}
-
 static int svm_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	int err;
@@ -1997,7 +1938,6 @@ static const struct file_operations svm_fops = {
 	.owner			= THIS_MODULE,
 	.open			= svm_open,
 	.mmap			= svm_mmap,
-	.get_unmapped_area = svm_get_unmapped_area,
 	.unlocked_ioctl		= svm_ioctl,
 };
 
-- 
Gitee


From 8f2269877eef6b5a36c8109280ee5fd10dcaff51 Mon Sep 17 00:00:00 2001
From: Lijun Fang <fanglijun3@huawei.com>
Date: Thu, 14 Apr 2022 09:33:51 +0000
Subject: [PATCH 200/340] svm: Change svm to modules

ascend inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UR5Y
CVE: NA

--------------------

Change svm to modules by default.

Signed-off-by: Lijun Fang <fanglijun3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/char/Kconfig |  4 ++--
 drivers/char/svm.c   | 36 +++++++++++++++++-------------------
 2 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 505562acf275..d6bb0a4bde56 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -553,9 +553,9 @@ config ADI
 	  driver include crash and makedumpfile.
 
 config HISI_SVM
-	bool "Hisilicon svm driver"
+	tristate "Hisilicon svm driver"
 	depends on ARM64 && ARM_SMMU_V3 && MMU_NOTIFIER && HUGETLBFS
-	default y
+	default m
 	help
 	  This driver provides character-level access to Hisilicon
 	  SVM chipset. Typically, you can bind a task to the
diff --git a/drivers/char/svm.c b/drivers/char/svm.c
index 1f24328b3504..9f98f57d562a 100644
--- a/drivers/char/svm.c
+++ b/drivers/char/svm.c
@@ -500,7 +500,7 @@ static int svm_remove_core(struct device *dev, void *data)
 {
 	struct core_device *cdev = to_core_device(dev);
 
-	if (!cdev->smmu_bypass) {
+	if (!cdev->smmu_bypass && cdev->group && cdev->domain) {
 		iommu_sva_device_shutdown(dev);
 		iommu_detach_group(cdev->domain, cdev->group);
 		iommu_group_put(cdev->group);
@@ -928,11 +928,7 @@ static struct task_struct *svm_get_task(struct svm_bind_process params)
 	if (params.flags & SVM_BIND_PID) {
 		struct mm_struct *mm = NULL;
 
-		rcu_read_lock();
-		task = find_task_by_vpid(params.vpid);
-		if (task)
-			get_task_struct(task);
-		rcu_read_unlock();
+		task = find_get_task_by_vpid(params.vpid);
 		if (task == NULL)
 			return ERR_PTR(-ESRCH);
 
@@ -1039,6 +1035,7 @@ static int svm_acpi_add_core(struct svm_device *sdev,
 	struct core_device *cdev = NULL;
 	char *name = NULL;
 	enum dev_dma_attr attr;
+	const union acpi_object *obj;
 
 	name = devm_kasprintf(sdev->dev, GFP_KERNEL, "svm_child_dev%d", id);
 	if (name == NULL)
@@ -1063,7 +1060,7 @@ static int svm_acpi_add_core(struct svm_device *sdev,
 		return err;
 	}
 
-	attr = acpi_get_dma_attr(children);
+	attr = device_get_dma_attr(&cdev->dev);
 	if (attr != DEV_DMA_NOT_SUPPORTED) {
 		err = acpi_dma_configure(&cdev->dev, attr);
 		if (err) {
@@ -1072,12 +1069,14 @@ static int svm_acpi_add_core(struct svm_device *sdev,
 		}
 	}
 
-	err = acpi_dev_prop_read_single(children, "hisi,smmu-bypass",
-			DEV_PROP_U8, &cdev->smmu_bypass);
+	err = acpi_dev_get_property(children, "hisi,smmu-bypass",
+			DEV_PROP_U8, &obj);
 	if (err) {
 		dev_info(&children->dev, "read smmu bypass failed\n");
 	}
 
+	cdev->smmu_bypass = (u8)obj->integer.value;
+
 	cdev->group = iommu_group_get(&cdev->dev);
 	if (IS_ERR_OR_NULL(cdev->group)) {
 		dev_err(&cdev->dev, "smmu is not right configured\n");
@@ -1296,7 +1295,7 @@ static pte_t *svm_get_pte(struct vm_area_struct *vma,
 
 	if (is_vm_hugetlb_page(vma)) {
 		if (pud_present(*pud)) {
-			if (pud_huge(*pud)) {
+			if (pud_val(*pud) && !(pud_val(*pud) & PUD_TABLE_BIT)) {
 				pte = (pte_t *)pud;
 				*offset = addr & (PUD_SIZE - 1);
 				size = PUD_SIZE;
@@ -1347,11 +1346,11 @@ static pte_t *svm_walk_pt(unsigned long addr, unsigned long *page_size,
 		return NULL;
 
 	pgd = pgd_offset(mm, addr);
-	if (pgd_none_or_clear_bad(pgd))
+	if (pgd_none(*pgd))
 		return NULL;
 
 	pud = pud_offset(pgd, addr);
-	if (pud_none_or_clear_bad(pud))
+	if (pud_none(*pud))
 		return NULL;
 
 	return svm_get_pte(vma, pud, addr, page_size, offset);
@@ -1540,11 +1539,11 @@ static long svm_remap_get_phys(struct mm_struct *mm, struct vm_area_struct *vma,
 		return err;
 
 	pgd = pgd_offset(mm, addr);
-	if (pgd_none_or_clear_bad(pgd))
+	if (pgd_none(*pgd))
 		return err;
 
 	pud = pud_offset(pgd, addr);
-	if (pud_none_or_clear_bad(pud))
+	if (pud_none(*pud))
 		return err;
 
 	pte = svm_get_pte(vma, pud, addr, page_size, offset);
@@ -1585,20 +1584,19 @@ static long svm_remap_proc(unsigned long __user *arg)
 		return -EINVAL;
 	}
 
-	rcu_read_lock();
 	if (pmem.pid) {
-		ptask = find_task_by_vpid(pmem.pid);
+		ptask = find_get_task_by_vpid(pmem.pid);
 		if (!ptask) {
-			rcu_read_unlock();
 			pr_err("No task for this pid\n");
 			return -EINVAL;
 		}
 	} else {
+		rcu_read_lock();
 		ptask = current;
+		get_task_struct(ptask);
+		rcu_read_unlock();
 	}
 
-	get_task_struct(ptask);
-	rcu_read_unlock();
 	pmm = ptask->mm;
 
 	down_read(&mm->mmap_sem);
-- 
Gitee


From 94e056c86418ba4118e1252b0ac3fa6e1ff0ea94 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Fri, 15 Apr 2022 14:44:05 +0800
Subject: [PATCH 201/340] binder: use euid from cred instead of using task

stable inclusion
from linux-4.19.218
commit 5d40061285b81a7e213dc9b37acc4a0545eedf32

--------------------------------

commit 29bc22ac5e5bc63275e850f0c8fc549e3d0e306b upstream.

Save the 'struct cred' associated with a binder process
at initial open to avoid potential race conditions
when converting to an euid.

Set a transaction's sender_euid from the 'struct cred'
saved at binder_open() instead of looking up the euid
from the binder proc's 'struct task'. This ensures
the euid is associated with the security context that
of the task that opened binder.

Cc: stable@vger.kernel.org # 4.4+
Fixes: 457b9a6f09f0 ("Staging: android: add binder driver")
Signed-off-by: Todd Kjos <tkjos@google.com>
Suggested-by: Stephen Smalley <stephen.smalley.work@gmail.com>
Suggested-by: Jann Horn <jannh@google.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/android/binder.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 18c32637047f..3ba23d29f50a 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -483,6 +483,9 @@ enum binder_deferred_state {
  * @files                 files_struct for process
  *                        (protected by @files_lock)
  * @files_lock            mutex to protect @files
+ * @cred                  struct cred associated with the `struct file`
+ *                        in binder_open()
+ *                        (invariant after initialized)
  * @deferred_work_node:   element for binder_deferred_list
  *                        (protected by binder_deferred_lock)
  * @deferred_work:        bitmap of deferred work to perform
@@ -529,6 +532,7 @@ struct binder_proc {
 	struct task_struct *tsk;
 	struct files_struct *files;
 	struct mutex files_lock;
+	const struct cred *cred;
 	struct hlist_node deferred_work_node;
 	int deferred_work;
 	bool is_dead;
@@ -2956,7 +2960,7 @@ static void binder_transaction(struct binder_proc *proc,
 		t->from = thread;
 	else
 		t->from = NULL;
-	t->sender_euid = task_euid(proc->tsk);
+	t->sender_euid = proc->cred->euid;
 	t->to_proc = target_proc;
 	t->to_thread = target_thread;
 	t->code = tr->code;
@@ -4328,6 +4332,7 @@ static void binder_free_proc(struct binder_proc *proc)
 	BUG_ON(!list_empty(&proc->delivered_death));
 	binder_alloc_deferred_release(&proc->alloc);
 	put_task_struct(proc->tsk);
+	put_cred(proc->cred);
 	binder_stats_deleted(BINDER_STAT_PROC);
 	kfree(proc);
 }
@@ -4786,6 +4791,7 @@ static int binder_open(struct inode *nodp, struct file *filp)
 	get_task_struct(current->group_leader);
 	proc->tsk = current->group_leader;
 	mutex_init(&proc->files_lock);
+	proc->cred = get_cred(filp->f_cred);
 	INIT_LIST_HEAD(&proc->todo);
 	proc->default_priority = task_nice(current);
 	binder_dev = container_of(filp->private_data, struct binder_device,
-- 
Gitee


From d1b9a67709f4a7ddeb01585718df846e08351977 Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Fri, 15 Apr 2022 14:44:06 +0800
Subject: [PATCH 202/340] binder: use cred instead of task for selinux checks

stable inclusion
from linux-4.19.218
commit e82f3f9638f17d58e9a217bce127e2376aefcb9d

--------------------------------

commit 52f88693378a58094c538662ba652aff0253c4fe upstream.

Since binder was integrated with selinux, it has passed
'struct task_struct' associated with the binder_proc
to represent the source and target of transactions.
The conversion of task to SID was then done in the hook
implementations. It turns out that there are race conditions
which can result in an incorrect security context being used.

Fix by using the 'struct cred' saved during binder_open and pass
it to the selinux subsystem.

Cc: stable@vger.kernel.org # 5.14 (need backport for earlier stables)
Fixes: 79af73079d75 ("Add security hooks to binder and implement the hooks for SELinux.")
Suggested-by: Jann Horn <jannh@google.com>
Signed-off-by: Todd Kjos <tkjos@google.com>
Acked-by: Casey Schaufler <casey@schaufler-ca.com>
Signed-off-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
conflicts:
	drivers/android/binder.c
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/android/binder.c  | 12 ++++++------
 include/linux/lsm_hooks.h | 28 ++++++++++++++--------------
 include/linux/security.h  | 28 ++++++++++++++--------------
 security/security.c       | 14 +++++++-------
 security/selinux/hooks.c  | 36 +++++++++++++++---------------------
 5 files changed, 56 insertions(+), 62 deletions(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 3ba23d29f50a..10735051785d 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2337,7 +2337,7 @@ static int binder_translate_binder(struct flat_binder_object *fp,
 		ret = -EINVAL;
 		goto done;
 	}
-	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+	if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
 		ret = -EPERM;
 		goto done;
 	}
@@ -2383,7 +2383,7 @@ static int binder_translate_handle(struct flat_binder_object *fp,
 				  proc->pid, thread->pid, fp->handle);
 		return -EINVAL;
 	}
-	if (security_binder_transfer_binder(proc->tsk, target_proc->tsk)) {
+	if (security_binder_transfer_binder(proc->cred, target_proc->cred)) {
 		ret = -EPERM;
 		goto done;
 	}
@@ -2467,7 +2467,7 @@ static int binder_translate_fd(int fd,
 		ret = -EBADF;
 		goto err_fget;
 	}
-	ret = security_binder_transfer_file(proc->tsk, target_proc->tsk, file);
+	ret = security_binder_transfer_file(proc->cred, target_proc->cred, file);
 	if (ret < 0) {
 		ret = -EPERM;
 		goto err_security;
@@ -2845,8 +2845,8 @@ static void binder_transaction(struct binder_proc *proc,
 			goto err_dead_binder;
 		}
 		e->to_node = target_node->debug_id;
-		if (security_binder_transaction(proc->tsk,
-						target_proc->tsk) < 0) {
+		if (security_binder_transaction(proc->cred,
+						target_proc->cred) < 0) {
 			return_error = BR_FAILED_REPLY;
 			return_error_param = -EPERM;
 			return_error_line = __LINE__;
@@ -4536,7 +4536,7 @@ static int binder_ioctl_set_ctx_mgr(struct file *filp)
 		ret = -EBUSY;
 		goto out;
 	}
-	ret = security_binder_set_context_mgr(proc->tsk);
+	ret = security_binder_set_context_mgr(proc->cred);
 	if (ret < 0)
 		goto out;
 	if (uid_valid(context->binder_context_mgr_uid)) {
diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h
index 97a020c616ad..d17ea5358368 100644
--- a/include/linux/lsm_hooks.h
+++ b/include/linux/lsm_hooks.h
@@ -1211,22 +1211,22 @@
  *
  * @binder_set_context_mgr:
  *	Check whether @mgr is allowed to be the binder context manager.
- *	@mgr contains the task_struct for the task being registered.
+ *	@mgr contains the struct cred for the current binder process.
  *	Return 0 if permission is granted.
  * @binder_transaction:
  *	Check whether @from is allowed to invoke a binder transaction call
  *	to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
+ *	@from contains the struct cred for the sending process.
+ *	@to contains the struct cred for the receiving process.
  * @binder_transfer_binder:
  *	Check whether @from is allowed to transfer a binder reference to @to.
- *	@from contains the task_struct for the sending task.
- *	@to contains the task_struct for the receiving task.
+ *	@from contains the struct cred for the sending process.
+ *	@to contains the struct cred for the receiving process.
  * @binder_transfer_file:
  *	Check whether @from is allowed to transfer @file to @to.
- *	@from contains the task_struct for the sending task.
+ *	@from contains the struct cred for the sending process.
  *	@file contains the struct file being transferred.
- *	@to contains the task_struct for the receiving task.
+ *	@to contains the struct cred for the receiving process.
  *
  * @ptrace_access_check:
  *	Check permission before allowing the current process to trace the
@@ -1428,13 +1428,13 @@
  *
  */
 union security_list_options {
-	int (*binder_set_context_mgr)(struct task_struct *mgr);
-	int (*binder_transaction)(struct task_struct *from,
-					struct task_struct *to);
-	int (*binder_transfer_binder)(struct task_struct *from,
-					struct task_struct *to);
-	int (*binder_transfer_file)(struct task_struct *from,
-					struct task_struct *to,
+	int (*binder_set_context_mgr)(const struct cred *mgr);
+	int (*binder_transaction)(const struct cred *from,
+					const struct cred *to);
+	int (*binder_transfer_binder)(const struct cred *from,
+					const struct cred *to);
+	int (*binder_transfer_file)(const struct cred *from,
+					const struct cred *to,
 					struct file *file);
 
 	int (*ptrace_access_check)(struct task_struct *child,
diff --git a/include/linux/security.h b/include/linux/security.h
index 75f4156c84d7..9eb1c7a0f280 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -216,13 +216,13 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
 extern int security_init(void);
 
 /* Security operations */
-int security_binder_set_context_mgr(struct task_struct *mgr);
-int security_binder_transaction(struct task_struct *from,
-				struct task_struct *to);
-int security_binder_transfer_binder(struct task_struct *from,
-				    struct task_struct *to);
-int security_binder_transfer_file(struct task_struct *from,
-				  struct task_struct *to, struct file *file);
+int security_binder_set_context_mgr(const struct cred *mgr);
+int security_binder_transaction(const struct cred *from,
+				const struct cred *to);
+int security_binder_transfer_binder(const struct cred *from,
+				    const struct cred *to);
+int security_binder_transfer_file(const struct cred *from,
+				  const struct cred *to, struct file *file);
 int security_ptrace_access_check(struct task_struct *child, unsigned int mode);
 int security_ptrace_traceme(struct task_struct *parent);
 int security_capget(struct task_struct *target,
@@ -439,25 +439,25 @@ static inline int security_init(void)
 	return 0;
 }
 
-static inline int security_binder_set_context_mgr(struct task_struct *mgr)
+static inline int security_binder_set_context_mgr(const struct cred *mgr)
 {
 	return 0;
 }
 
-static inline int security_binder_transaction(struct task_struct *from,
-					      struct task_struct *to)
+static inline int security_binder_transaction(const struct cred *from,
+					      const struct cred *to)
 {
 	return 0;
 }
 
-static inline int security_binder_transfer_binder(struct task_struct *from,
-						  struct task_struct *to)
+static inline int security_binder_transfer_binder(const struct cred *from,
+						  const struct cred *to)
 {
 	return 0;
 }
 
-static inline int security_binder_transfer_file(struct task_struct *from,
-						struct task_struct *to,
+static inline int security_binder_transfer_file(const struct cred *from,
+						const struct cred *to,
 						struct file *file)
 {
 	return 0;
diff --git a/security/security.c b/security/security.c
index 9e4d6c999c79..8a9e1ececd7d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -232,25 +232,25 @@ EXPORT_SYMBOL(unregister_lsm_notifier);
 
 /* Security operations */
 
-int security_binder_set_context_mgr(struct task_struct *mgr)
+int security_binder_set_context_mgr(const struct cred *mgr)
 {
 	return call_int_hook(binder_set_context_mgr, 0, mgr);
 }
 
-int security_binder_transaction(struct task_struct *from,
-				struct task_struct *to)
+int security_binder_transaction(const struct cred *from,
+				const struct cred *to)
 {
 	return call_int_hook(binder_transaction, 0, from, to);
 }
 
-int security_binder_transfer_binder(struct task_struct *from,
-				    struct task_struct *to)
+int security_binder_transfer_binder(const struct cred *from,
+				    const struct cred *to)
 {
 	return call_int_hook(binder_transfer_binder, 0, from, to);
 }
 
-int security_binder_transfer_file(struct task_struct *from,
-				  struct task_struct *to, struct file *file)
+int security_binder_transfer_file(const struct cred *from,
+				  const struct cred *to, struct file *file)
 {
 	return call_int_hook(binder_transfer_file, 0, from, to, file);
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 25353b4975c9..056f5de53e7e 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -2218,22 +2218,19 @@ static inline u32 open_file_to_av(struct file *file)
 
 /* Hook functions begin here. */
 
-static int selinux_binder_set_context_mgr(struct task_struct *mgr)
+static int selinux_binder_set_context_mgr(const struct cred *mgr)
 {
-	u32 mysid = current_sid();
-	u32 mgrsid = task_sid(mgr);
-
 	return avc_has_perm(&selinux_state,
-			    mysid, mgrsid, SECCLASS_BINDER,
+			    current_sid(), cred_sid(mgr), SECCLASS_BINDER,
 			    BINDER__SET_CONTEXT_MGR, NULL);
 }
 
-static int selinux_binder_transaction(struct task_struct *from,
-				      struct task_struct *to)
+static int selinux_binder_transaction(const struct cred *from,
+				      const struct cred *to)
 {
 	u32 mysid = current_sid();
-	u32 fromsid = task_sid(from);
-	u32 tosid = task_sid(to);
+	u32 fromsid = cred_sid(from);
+	u32 tosid = cred_sid(to);
 	int rc;
 
 	if (mysid != fromsid) {
@@ -2244,27 +2241,24 @@ static int selinux_binder_transaction(struct task_struct *from,
 			return rc;
 	}
 
-	return avc_has_perm(&selinux_state,
-			    fromsid, tosid, SECCLASS_BINDER, BINDER__CALL,
-			    NULL);
+	return avc_has_perm(&selinux_state, fromsid, tosid,
+			    SECCLASS_BINDER, BINDER__CALL, NULL);
 }
 
-static int selinux_binder_transfer_binder(struct task_struct *from,
-					  struct task_struct *to)
+static int selinux_binder_transfer_binder(const struct cred *from,
+					  const struct cred *to)
 {
-	u32 fromsid = task_sid(from);
-	u32 tosid = task_sid(to);
-
 	return avc_has_perm(&selinux_state,
-			    fromsid, tosid, SECCLASS_BINDER, BINDER__TRANSFER,
+			    cred_sid(from), cred_sid(to),
+			    SECCLASS_BINDER, BINDER__TRANSFER,
 			    NULL);
 }
 
-static int selinux_binder_transfer_file(struct task_struct *from,
-					struct task_struct *to,
+static int selinux_binder_transfer_file(const struct cred *from,
+					const struct cred *to,
 					struct file *file)
 {
-	u32 sid = task_sid(to);
+	u32 sid = cred_sid(to);
 	struct file_security_struct *fsec = file->f_security;
 	struct dentry *dentry = file->f_path.dentry;
 	struct inode_security_struct *isec;
-- 
Gitee


From a88267d538c01e97ba50921983de6afdc9bf4b1c Mon Sep 17 00:00:00 2001
From: Todd Kjos <tkjos@google.com>
Date: Fri, 15 Apr 2022 14:44:07 +0800
Subject: [PATCH 203/340] binder: fix test regression due to sender_euid change

stable inclusion
from linux-4.19.219
commit c3b9f29fca6682550d731c80745b421415c1e0af

--------------------------------

commit c21a80ca0684ec2910344d72556c816cb8940c01 upstream.

This is a partial revert of commit
29bc22ac5e5b ("binder: use euid from cred instead of using task").
Setting sender_euid using proc->cred caused some Android system test
regressions that need further investigation. It is a partial
reversion because subsequent patches rely on proc->cred.

Fixes: 29bc22ac5e5b ("binder: use euid from cred instead of using task")
Cc: stable@vger.kernel.org # 4.4+
Acked-by: Christian Brauner <christian.brauner@ubuntu.com>
Signed-off-by: Todd Kjos <tkjos@google.com>
Change-Id: I9b1769a3510fed250bb21859ef8beebabe034c66
Link: https://lore.kernel.org/r/20211112180720.2858135-1-tkjos@google.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/android/binder.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/android/binder.c b/drivers/android/binder.c
index 10735051785d..0654133efbe4 100644
--- a/drivers/android/binder.c
+++ b/drivers/android/binder.c
@@ -2960,7 +2960,7 @@ static void binder_transaction(struct binder_proc *proc,
 		t->from = thread;
 	else
 		t->from = NULL;
-	t->sender_euid = proc->cred->euid;
+	t->sender_euid = task_euid(proc->tsk);
 	t->to_proc = target_proc;
 	t->to_thread = target_thread;
 	t->code = tr->code;
-- 
Gitee


From 4fd62a05a608e8c50258f6eafbc6f852def5c52c Mon Sep 17 00:00:00 2001
From: Simon Leiner <simon@leiner.me>
Date: Fri, 15 Apr 2022 14:44:08 +0800
Subject: [PATCH 204/340] xen/xenbus: Fix granting of vmalloc'd memory

stable inclusion
from linux-4.19.144
commit 47eb291ba65bfade197e73ee13610d97809cb087

--------------------------------

[ Upstream commit d742db70033c745e410523e00522ee0cfe2aa416 ]

On some architectures (like ARM), virt_to_gfn cannot be used for
vmalloc'd memory because of its reliance on virt_to_phys. This patch
introduces a check for vmalloc'd addresses and obtains the PFN using
vmalloc_to_pfn in that case.

Signed-off-by: Simon Leiner <simon@leiner.me>
Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Link: https://lore.kernel.org/r/20200825093153.35500-1-simon@leiner.me
Signed-off-by: Juergen Gross <jgross@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/xenbus/xenbus_client.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 6f6f6db94838..9955892d1ea6 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -371,8 +371,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 	int i, j;
 
 	for (i = 0; i < nr_pages; i++) {
-		err = gnttab_grant_foreign_access(dev->otherend_id,
-						  virt_to_gfn(vaddr), 0);
+		unsigned long gfn;
+
+		if (is_vmalloc_addr(vaddr))
+			gfn = pfn_to_gfn(vmalloc_to_pfn(vaddr));
+		else
+			gfn = virt_to_gfn(vaddr);
+
+		err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
 		if (err < 0) {
 			xenbus_dev_fatal(dev, err,
 					 "granting access to ring page");
-- 
Gitee


From 85586d4f7759da6b1903bfba4b7cd718995f4aca Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:09 +0800
Subject: [PATCH 205/340] xen/xenbus: don't let xenbus_grant_ring() remove
 grants in error case

stable inclusion
from linux-4.19.234
commit 8d521d960aef22781ff499e16899c30af899de8d

--------------------------------

Commit 3777ea7bac3113005b7180e6b9dadf16d19a5827 upstream.

Letting xenbus_grant_ring() tear down grants in the error case is
problematic, as the other side could already have used these grants.
Calling gnttab_end_foreign_access_ref() without checking success is
resulting in an unclear situation for any caller of xenbus_grant_ring()
as in the error case the memory pages of the ring page might be
partially mapped. Freeing them would risk unwanted foreign access to
them, while not freeing them would leak memory.

In order to remove the need to undo any gnttab_grant_foreign_access()
calls, use gnttab_alloc_grant_references() to make sure no further
error can occur in the loop granting access to the ring pages.

It should be noted that this way of handling removes leaking of
grant entries in the error case, too.

This is CVE-2022-23040 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/xenbus/xenbus_client.c | 24 +++++++++++-------------
 1 file changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c
index 9955892d1ea6..365cf9f5c967 100644
--- a/drivers/xen/xenbus/xenbus_client.c
+++ b/drivers/xen/xenbus/xenbus_client.c
@@ -368,7 +368,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 		      unsigned int nr_pages, grant_ref_t *grefs)
 {
 	int err;
-	int i, j;
+	unsigned int i;
+	grant_ref_t gref_head;
+
+	err = gnttab_alloc_grant_references(nr_pages, &gref_head);
+	if (err) {
+		xenbus_dev_fatal(dev, err, "granting access to ring page");
+		return err;
+	}
 
 	for (i = 0; i < nr_pages; i++) {
 		unsigned long gfn;
@@ -378,23 +385,14 @@ int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr,
 		else
 			gfn = virt_to_gfn(vaddr);
 
-		err = gnttab_grant_foreign_access(dev->otherend_id, gfn, 0);
-		if (err < 0) {
-			xenbus_dev_fatal(dev, err,
-					 "granting access to ring page");
-			goto fail;
-		}
-		grefs[i] = err;
+		grefs[i] = gnttab_claim_grant_reference(&gref_head);
+		gnttab_grant_foreign_access_ref(grefs[i], dev->otherend_id,
+						gfn, 0);
 
 		vaddr = vaddr + XEN_PAGE_SIZE;
 	}
 
 	return 0;
-
-fail:
-	for (j = 0; j < i; j++)
-		gnttab_end_foreign_access_ref(grefs[j], 0);
-	return err;
 }
 EXPORT_SYMBOL_GPL(xenbus_grant_ring);
 
-- 
Gitee


From efdc8ee28b9e821948ce8f7afb0047f9358b57c5 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:10 +0800
Subject: [PATCH 206/340] xen/grant-table: add gnttab_try_end_foreign_access()

stable inclusion
from linux-4.19.234
commit 17659846fe336366b1663194f5669d10f5947f53

--------------------------------

Commit 6b1775f26a2da2b05a6dc8ec2b5d14e9a4701a1a upstream.

Add a new grant table function gnttab_try_end_foreign_access(), which
will remove and free a grant if it is not in use.

Its main use case is to either free a grant if it is no longer in use,
or to take some other action if it is still in use. This other action
can be an error exit, or (e.g. in the case of blkfront persistent grant
feature) some special handling.

This is CVE-2022-23036, CVE-2022-23038 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/grant-table.c | 14 ++++++++++++--
 include/xen/grant_table.h | 12 ++++++++++++
 2 files changed, 24 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 97341fa75458..9524806eb4b9 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -436,11 +436,21 @@ static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
 	       what, ref, page ? page_to_pfn(page) : -1);
 }
 
+int gnttab_try_end_foreign_access(grant_ref_t ref)
+{
+	int ret = _gnttab_end_foreign_access_ref(ref, 0);
+
+	if (ret)
+		put_free_entry(ref);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
+
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 			       unsigned long page)
 {
-	if (gnttab_end_foreign_access_ref(ref, readonly)) {
-		put_free_entry(ref);
+	if (gnttab_try_end_foreign_access(ref)) {
 		if (page != 0)
 			put_page(virt_to_page(page));
 	} else
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index a9978350b45b..7628ab25f686 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -97,10 +97,22 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
  * access has been ended, free the given page too.  Access will be ended
  * immediately iff the grant entry is not in use, otherwise it will happen
  * some time later.  page may be 0, in which case no freeing will occur.
+ * Note that the granted page might still be accessed (read or write) by the
+ * other side after gnttab_end_foreign_access() returns, so even if page was
+ * specified as 0 it is not allowed to just reuse the page for other
+ * purposes immediately.
  */
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 			       unsigned long page);
 
+/*
+ * End access through the given grant reference, iff the grant entry is
+ * no longer in use.  In case of success ending foreign access, the
+ * grant reference is deallocated.
+ * Return 1 if the grant entry was freed, 0 if it is still in use.
+ */
+int gnttab_try_end_foreign_access(grant_ref_t ref);
+
 int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
 
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
-- 
Gitee


From 3b5b044ddd9697f6f52d3e96612248c11db48684 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:11 +0800
Subject: [PATCH 207/340] xen/blkfront: don't use gnttab_query_foreign_access()
 for mapped status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.234
commit 423a3a50dce9a48d10d2d2a70cd2f78064c13703

--------------------------------

Commit abf1fd5919d6238ee3bc5eb4a9b6c3947caa6638 upstream.

It isn't enough to check whether a grant is still being in use by
calling gnttab_query_foreign_access(), as a mapping could be realized
by the other side just after having called that function.

In case the call was done in preparation of revoking a grant it is
better to do so via gnttab_end_foreign_access_ref() and check the
success of that operation instead.

For the ring allocation use alloc_pages_exact() in order to avoid
high order pages in case of a multi-page ring.

If a grant wasn't unmapped by the backend without persistent grants
being used, set the device state to "error".

This is CVE-2022-23036 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Roger Pau Monné <roger.pau@citrix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/block/xen-blkfront.c | 63 +++++++++++++++++++++---------------
 1 file changed, 37 insertions(+), 26 deletions(-)

diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 4e52917ebd31..0e451b17f33a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -1342,7 +1342,8 @@ static void blkif_free_ring(struct blkfront_ring_info *rinfo)
 			rinfo->ring_ref[i] = GRANT_INVALID_REF;
 		}
 	}
-	free_pages((unsigned long)rinfo->ring.sring, get_order(info->nr_ring_pages * XEN_PAGE_SIZE));
+	free_pages_exact(rinfo->ring.sring,
+			 info->nr_ring_pages * XEN_PAGE_SIZE);
 	rinfo->ring.sring = NULL;
 
 	if (rinfo->irq)
@@ -1426,9 +1427,15 @@ static int blkif_get_final_status(enum blk_req_status s1,
 	return BLKIF_RSP_OKAY;
 }
 
-static bool blkif_completion(unsigned long *id,
-			     struct blkfront_ring_info *rinfo,
-			     struct blkif_response *bret)
+/*
+ * Return values:
+ *  1 response processed.
+ *  0 missing further responses.
+ * -1 error while processing.
+ */
+static int blkif_completion(unsigned long *id,
+			    struct blkfront_ring_info *rinfo,
+			    struct blkif_response *bret)
 {
 	int i = 0;
 	struct scatterlist *sg;
@@ -1451,7 +1458,7 @@ static bool blkif_completion(unsigned long *id,
 
 		/* Wait the second response if not yet here. */
 		if (s2->status < REQ_DONE)
-			return false;
+			return 0;
 
 		bret->status = blkif_get_final_status(s->status,
 						      s2->status);
@@ -1502,42 +1509,43 @@ static bool blkif_completion(unsigned long *id,
 	}
 	/* Add the persistent grant into the list of free grants */
 	for (i = 0; i < num_grant; i++) {
-		if (gnttab_query_foreign_access(s->grants_used[i]->gref)) {
+		if (!gnttab_try_end_foreign_access(s->grants_used[i]->gref)) {
 			/*
 			 * If the grant is still mapped by the backend (the
 			 * backend has chosen to make this grant persistent)
 			 * we add it at the head of the list, so it will be
 			 * reused first.
 			 */
-			if (!info->feature_persistent)
-				pr_alert_ratelimited("backed has not unmapped grant: %u\n",
-						     s->grants_used[i]->gref);
+			if (!info->feature_persistent) {
+				pr_alert("backed has not unmapped grant: %u\n",
+					 s->grants_used[i]->gref);
+				return -1;
+			}
 			list_add(&s->grants_used[i]->node, &rinfo->grants);
 			rinfo->persistent_gnts_c++;
 		} else {
 			/*
-			 * If the grant is not mapped by the backend we end the
-			 * foreign access and add it to the tail of the list,
-			 * so it will not be picked again unless we run out of
-			 * persistent grants.
+			 * If the grant is not mapped by the backend we add it
+			 * to the tail of the list, so it will not be picked
+			 * again unless we run out of persistent grants.
 			 */
-			gnttab_end_foreign_access(s->grants_used[i]->gref, 0, 0UL);
 			s->grants_used[i]->gref = GRANT_INVALID_REF;
 			list_add_tail(&s->grants_used[i]->node, &rinfo->grants);
 		}
 	}
 	if (s->req.operation == BLKIF_OP_INDIRECT) {
 		for (i = 0; i < INDIRECT_GREFS(num_grant); i++) {
-			if (gnttab_query_foreign_access(s->indirect_grants[i]->gref)) {
-				if (!info->feature_persistent)
-					pr_alert_ratelimited("backed has not unmapped grant: %u\n",
-							     s->indirect_grants[i]->gref);
+			if (!gnttab_try_end_foreign_access(s->indirect_grants[i]->gref)) {
+				if (!info->feature_persistent) {
+					pr_alert("backed has not unmapped grant: %u\n",
+						 s->indirect_grants[i]->gref);
+					return -1;
+				}
 				list_add(&s->indirect_grants[i]->node, &rinfo->grants);
 				rinfo->persistent_gnts_c++;
 			} else {
 				struct page *indirect_page;
 
-				gnttab_end_foreign_access(s->indirect_grants[i]->gref, 0, 0UL);
 				/*
 				 * Add the used indirect page back to the list of
 				 * available pages for indirect grefs.
@@ -1552,7 +1560,7 @@ static bool blkif_completion(unsigned long *id,
 		}
 	}
 
-	return true;
+	return 1;
 }
 
 static irqreturn_t blkif_interrupt(int irq, void *dev_id)
@@ -1618,12 +1626,17 @@ static irqreturn_t blkif_interrupt(int irq, void *dev_id)
 		}
 
 		if (bret.operation != BLKIF_OP_DISCARD) {
+			int ret;
+
 			/*
 			 * We may need to wait for an extra response if the
 			 * I/O request is split in 2
 			 */
-			if (!blkif_completion(&id, rinfo, &bret))
+			ret = blkif_completion(&id, rinfo, &bret);
+			if (!ret)
 				continue;
+			if (unlikely(ret < 0))
+				goto err;
 		}
 
 		if (add_id_to_freelist(rinfo, id)) {
@@ -1729,8 +1742,7 @@ static int setup_blkring(struct xenbus_device *dev,
 	for (i = 0; i < info->nr_ring_pages; i++)
 		rinfo->ring_ref[i] = GRANT_INVALID_REF;
 
-	sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH,
-						       get_order(ring_size));
+	sring = alloc_pages_exact(ring_size, GFP_NOIO);
 	if (!sring) {
 		xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring");
 		return -ENOMEM;
@@ -1740,7 +1752,7 @@ static int setup_blkring(struct xenbus_device *dev,
 
 	err = xenbus_grant_ring(dev, rinfo->ring.sring, info->nr_ring_pages, gref);
 	if (err < 0) {
-		free_pages((unsigned long)sring, get_order(ring_size));
+		free_pages_exact(sring, ring_size);
 		rinfo->ring.sring = NULL;
 		goto fail;
 	}
@@ -2719,11 +2731,10 @@ static void purge_persistent_grants(struct blkfront_info *info)
 		list_for_each_entry_safe(gnt_list_entry, tmp, &rinfo->grants,
 					 node) {
 			if (gnt_list_entry->gref == GRANT_INVALID_REF ||
-			    gnttab_query_foreign_access(gnt_list_entry->gref))
+			    !gnttab_try_end_foreign_access(gnt_list_entry->gref))
 				continue;
 
 			list_del(&gnt_list_entry->node);
-			gnttab_end_foreign_access(gnt_list_entry->gref, 0, 0UL);
 			rinfo->persistent_gnts_c--;
 			gnt_list_entry->gref = GRANT_INVALID_REF;
 			list_add_tail(&gnt_list_entry->node, &rinfo->grants);
-- 
Gitee


From e63e775c3c3f738ff9aa055519e6ccea6ed46583 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:12 +0800
Subject: [PATCH 208/340] xen/netfront: don't use gnttab_query_foreign_access()
 for mapped status

stable inclusion
from linux-4.19.234
commit 927e4eb8ddf4968b6a33be992b28063f84552c72

--------------------------------

Commit 31185df7e2b1d2fa1de4900247a12d7b9c7087eb upstream.

It isn't enough to check whether a grant is still being in use by
calling gnttab_query_foreign_access(), as a mapping could be realized
by the other side just after having called that function.

In case the call was done in preparation of revoking a grant it is
better to do so via gnttab_end_foreign_access_ref() and check the
success of that operation instead.

This is CVE-2022-23037 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/xen-netfront.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index da1b93f98cd1..2c879f07d3f2 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -412,14 +412,12 @@ static bool xennet_tx_buf_gc(struct netfront_queue *queue)
 			queue->tx_link[id] = TX_LINK_NONE;
 			skb = queue->tx_skbs[id];
 			queue->tx_skbs[id] = NULL;
-			if (unlikely(gnttab_query_foreign_access(
-				queue->grant_tx_ref[id]) != 0)) {
+			if (unlikely(!gnttab_end_foreign_access_ref(
+				queue->grant_tx_ref[id], GNTMAP_readonly))) {
 				dev_alert(dev,
 					  "Grant still in use by backend domain\n");
 				goto err;
 			}
-			gnttab_end_foreign_access_ref(
-				queue->grant_tx_ref[id], GNTMAP_readonly);
 			gnttab_release_grant_reference(
 				&queue->gref_tx_head, queue->grant_tx_ref[id]);
 			queue->grant_tx_ref[id] = GRANT_INVALID_REF;
-- 
Gitee


From 4ba1e7c05179c9554e9e3e328163fe7ec375c8d0 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:13 +0800
Subject: [PATCH 209/340] xen/scsifront: don't use
 gnttab_query_foreign_access() for mapped status

stable inclusion
from linux-4.19.234
commit 62a696c15cfcfd32527f81ca3d94f2bde57475dc

--------------------------------

Commit 33172ab50a53578a95691310f49567c9266968b0 upstream.

It isn't enough to check whether a grant is still being in use by
calling gnttab_query_foreign_access(), as a mapping could be realized
by the other side just after having called that function.

In case the call was done in preparation of revoking a grant it is
better to do so via gnttab_try_end_foreign_access() and check the
success of that operation instead.

This is CVE-2022-23038 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/scsi/xen-scsifront.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/scsi/xen-scsifront.c b/drivers/scsi/xen-scsifront.c
index 61389bdc7926..a1d822ae9fca 100644
--- a/drivers/scsi/xen-scsifront.c
+++ b/drivers/scsi/xen-scsifront.c
@@ -233,12 +233,11 @@ static void scsifront_gnttab_done(struct vscsifrnt_info *info,
 		return;
 
 	for (i = 0; i < shadow->nr_grants; i++) {
-		if (unlikely(gnttab_query_foreign_access(shadow->gref[i]))) {
+		if (unlikely(!gnttab_try_end_foreign_access(shadow->gref[i]))) {
 			shost_printk(KERN_ALERT, info->host, KBUILD_MODNAME
 				     "grant still in use by backend\n");
 			BUG();
 		}
-		gnttab_end_foreign_access(shadow->gref[i], 0, 0UL);
 	}
 
 	kfree(shadow->sg);
-- 
Gitee


From f0006464201fe2521cb18fcbc04b7fad458ea27f Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:14 +0800
Subject: [PATCH 210/340] xen/gntalloc: don't use gnttab_query_foreign_access()

stable inclusion
from linux-4.19.234
commit fbc57368ea527dcfa909908fc47a851a56e4e5ce

--------------------------------

Commit d3b6372c5881cb54925212abb62c521df8ba4809 upstream.

Using gnttab_query_foreign_access() is unsafe, as it is racy by design.

The use case in the gntalloc driver is not needed at all. While at it
replace the call of gnttab_end_foreign_access_ref() with a call of
gnttab_end_foreign_access(), which is what is really wanted there. In
case the grant wasn't used due to an allocation failure, just free the
grant via gnttab_free_grant_reference().

This is CVE-2022-23039 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/gntalloc.c | 25 +++++++------------------
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/drivers/xen/gntalloc.c b/drivers/xen/gntalloc.c
index 3fa40c723e8e..edb0acd0b832 100644
--- a/drivers/xen/gntalloc.c
+++ b/drivers/xen/gntalloc.c
@@ -169,20 +169,14 @@ static int add_grefs(struct ioctl_gntalloc_alloc_gref *op,
 		__del_gref(gref);
 	}
 
-	/* It's possible for the target domain to map the just-allocated grant
-	 * references by blindly guessing their IDs; if this is done, then
-	 * __del_gref will leave them in the queue_gref list. They need to be
-	 * added to the global list so that we can free them when they are no
-	 * longer referenced.
-	 */
-	if (unlikely(!list_empty(&queue_gref)))
-		list_splice_tail(&queue_gref, &gref_list);
 	mutex_unlock(&gref_mutex);
 	return rc;
 }
 
 static void __del_gref(struct gntalloc_gref *gref)
 {
+	unsigned long addr;
+
 	if (gref->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
 		uint8_t *tmp = kmap(gref->page);
 		tmp[gref->notify.pgoff] = 0;
@@ -196,21 +190,16 @@ static void __del_gref(struct gntalloc_gref *gref)
 	gref->notify.flags = 0;
 
 	if (gref->gref_id) {
-		if (gnttab_query_foreign_access(gref->gref_id))
-			return;
-
-		if (!gnttab_end_foreign_access_ref(gref->gref_id, 0))
-			return;
-
-		gnttab_free_grant_reference(gref->gref_id);
+		if (gref->page) {
+			addr = (unsigned long)page_to_virt(gref->page);
+			gnttab_end_foreign_access(gref->gref_id, 0, addr);
+		} else
+			gnttab_free_grant_reference(gref->gref_id);
 	}
 
 	gref_size--;
 	list_del(&gref->next_gref);
 
-	if (gref->page)
-		__free_page(gref->page);
-
 	kfree(gref);
 }
 
-- 
Gitee


From c16dd2ae5e63d66498b27028b46bec3c99960a83 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:15 +0800
Subject: [PATCH 211/340] xen: remove gnttab_query_foreign_access()

stable inclusion
from linux-4.19.234
commit c900f34fc134cc75de431e16546f37bf7804a012

--------------------------------

Commit 1dbd11ca75fe664d3e54607547771d021f531f59 upstream.

Remove gnttab_query_foreign_access(), as it is unused and unsafe to
use.

All previous use cases assumed a grant would not be in use after
gnttab_query_foreign_access() returned 0. This information is useless
in best case, as it only refers to a situation in the past, which could
have changed already.

Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/grant-table.c | 25 -------------------------
 include/xen/grant_table.h |  2 --
 2 files changed, 27 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 9524806eb4b9..1e785e65249a 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -134,13 +134,6 @@ struct gnttab_ops {
 	 * return the frame.
 	 */
 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
-	/*
-	 * Query the status of a grant entry. Ref parameter is reference of
-	 * queried grant entry, return value is the status of queried entry.
-	 * Detailed status(writing/reading) can be gotten from the return value
-	 * by bit operations.
-	 */
-	int (*query_foreign_access)(grant_ref_t ref);
 };
 
 struct unmap_refs_callback_data {
@@ -285,22 +278,6 @@ int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
 }
 EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
 
-static int gnttab_query_foreign_access_v1(grant_ref_t ref)
-{
-	return gnttab_shared.v1[ref].flags & (GTF_reading|GTF_writing);
-}
-
-static int gnttab_query_foreign_access_v2(grant_ref_t ref)
-{
-	return grstatus[ref] & (GTF_reading|GTF_writing);
-}
-
-int gnttab_query_foreign_access(grant_ref_t ref)
-{
-	return gnttab_interface->query_foreign_access(ref);
-}
-EXPORT_SYMBOL_GPL(gnttab_query_foreign_access);
-
 static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref, int readonly)
 {
 	u16 flags, nflags;
@@ -1307,7 +1284,6 @@ static const struct gnttab_ops gnttab_v1_ops = {
 	.update_entry			= gnttab_update_entry_v1,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
-	.query_foreign_access		= gnttab_query_foreign_access_v1,
 };
 
 static const struct gnttab_ops gnttab_v2_ops = {
@@ -1319,7 +1295,6 @@ static const struct gnttab_ops gnttab_v2_ops = {
 	.update_entry			= gnttab_update_entry_v2,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
-	.query_foreign_access		= gnttab_query_foreign_access_v2,
 };
 
 static bool gnttab_need_v2(void)
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 7628ab25f686..7087c9fea80f 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -118,8 +118,6 @@ int gnttab_grant_foreign_transfer(domid_t domid, unsigned long pfn);
 unsigned long gnttab_end_foreign_transfer_ref(grant_ref_t ref);
 unsigned long gnttab_end_foreign_transfer(grant_ref_t ref);
 
-int gnttab_query_foreign_access(grant_ref_t ref);
-
 /*
  * operations on reserved batches of grant references
  */
-- 
Gitee


From b0e5fec4687913b851c32f461d335e3c2130f39c Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:16 +0800
Subject: [PATCH 212/340] xen/9p: use alloc/free_pages_exact()

stable inclusion
from linux-4.19.234
commit 2466bed361f3274e3e0ca9d8e539532481c06fea

--------------------------------

Commit 5cadd4bb1d7fc9ab201ac14620d1a478357e4ebd upstream.

Instead of __get_free_pages() and free_pages() use alloc_pages_exact()
and free_pages_exact(). This is in preparation of a change of
gnttab_end_foreign_access() which will prohibit use of high-order
pages.

By using the local variable "order" instead of ring->intf->ring_order
in the error path of xen_9pfs_front_alloc_dataring() another bug is
fixed, as the error path can be entered before ring->intf->ring_order
is being set.

By using alloc_pages_exact() the size in bytes is specified for the
allocation, which fixes another bug for the case of
order < (PAGE_SHIFT - XEN_PAGE_SHIFT).

This is part of CVE-2022-23041 / XSA-396.

Reported-by: Simon Gaiser <simon@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 net/9p/trans_xen.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 9daab0dd833b..81002cba88b3 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -301,9 +301,9 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
 				ref = priv->rings[i].intf->ref[j];
 				gnttab_end_foreign_access(ref, 0, 0);
 			}
-			free_pages((unsigned long)priv->rings[i].data.in,
-				   XEN_9PFS_RING_ORDER -
-				   (PAGE_SHIFT - XEN_PAGE_SHIFT));
+			free_pages_exact(priv->rings[i].data.in,
+				   1UL << (XEN_9PFS_RING_ORDER +
+					   XEN_PAGE_SHIFT));
 		}
 		gnttab_end_foreign_access(priv->rings[i].ref, 0, 0);
 		free_page((unsigned long)priv->rings[i].intf);
@@ -341,8 +341,8 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
 	if (ret < 0)
 		goto out;
 	ring->ref = ret;
-	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-			XEN_9PFS_RING_ORDER - (PAGE_SHIFT - XEN_PAGE_SHIFT));
+	bytes = alloc_pages_exact(1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT),
+				  GFP_KERNEL | __GFP_ZERO);
 	if (!bytes) {
 		ret = -ENOMEM;
 		goto out;
@@ -373,9 +373,7 @@ static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
 	if (bytes) {
 		for (i--; i >= 0; i--)
 			gnttab_end_foreign_access(ring->intf->ref[i], 0, 0);
-		free_pages((unsigned long)bytes,
-			   XEN_9PFS_RING_ORDER -
-			   (PAGE_SHIFT - XEN_PAGE_SHIFT));
+		free_pages_exact(bytes, 1UL << (XEN_9PFS_RING_ORDER + XEN_PAGE_SHIFT));
 	}
 	gnttab_end_foreign_access(ring->ref, 0, 0);
 	free_page((unsigned long)ring->intf);
-- 
Gitee


From 6b310e722622a233becab4088711e57fb33a992a Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:17 +0800
Subject: [PATCH 213/340] xen/pvcalls: use alloc/free_pages_exact()

stable inclusion
from linux-4.19.234
commit f85d03f0f482cc28a2ee15a1fed2ae57ae359412

--------------------------------

Commit b0576cc9c6b843d99c6982888d59a56209341888 upstream.

Instead of __get_free_pages() and free_pages() use alloc_pages_exact()
and free_pages_exact(). This is in preparation of a change of
gnttab_end_foreign_access() which will prohibit use of high-order
pages.

This is part of CVE-2022-23041 / XSA-396.

Reported-by: Simon Gaiser <simon@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/pvcalls-front.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/xen/pvcalls-front.c b/drivers/xen/pvcalls-front.c
index d7438fdc5706..285bb8390a97 100644
--- a/drivers/xen/pvcalls-front.c
+++ b/drivers/xen/pvcalls-front.c
@@ -346,8 +346,8 @@ static void free_active_ring(struct sock_mapping *map)
 	if (!map->active.ring)
 		return;
 
-	free_pages((unsigned long)map->active.data.in,
-			map->active.ring->ring_order);
+	free_pages_exact(map->active.data.in,
+			 PAGE_SIZE << map->active.ring->ring_order);
 	free_page((unsigned long)map->active.ring);
 }
 
@@ -361,8 +361,8 @@ static int alloc_active_ring(struct sock_mapping *map)
 		goto out;
 
 	map->active.ring->ring_order = PVCALLS_RING_ORDER;
-	bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-					PVCALLS_RING_ORDER);
+	bytes = alloc_pages_exact(PAGE_SIZE << PVCALLS_RING_ORDER,
+				  GFP_KERNEL | __GFP_ZERO);
 	if (!bytes)
 		goto out;
 
-- 
Gitee


From ca1d0828717a0994f4ef1e5fd00ab4514f963c81 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:18 +0800
Subject: [PATCH 214/340] xen/gnttab: fix gnttab_end_foreign_access() without
 page specified

stable inclusion
from linux-4.19.234
commit 92dc0e4a219602242407dedd987dc9c8263c959b

--------------------------------

Commit 42baefac638f06314298087394b982ead9ec444b upstream.

gnttab_end_foreign_access() is used to free a grant reference and
optionally to free the associated page. In case the grant is still in
use by the other side processing is being deferred. This leads to a
problem in case no page to be freed is specified by the caller: the
caller doesn't know that the page is still mapped by the other side
and thus should not be used for other purposes.

The correct way to handle this situation is to take an additional
reference to the granted page in case handling is being deferred and
to drop that reference when the grant reference could be freed
finally.

This requires that there are no users of gnttab_end_foreign_access()
left directly repurposing the granted page after the call, as this
might result in clobbered data or information leaks via the not yet
freed grant reference.

This is part of CVE-2022-23041 / XSA-396.

Reported-by: Simon Gaiser <simon@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/xen/grant-table.c | 36 +++++++++++++++++++++++++++++-------
 include/xen/grant_table.h |  7 ++++++-
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/drivers/xen/grant-table.c b/drivers/xen/grant-table.c
index 1e785e65249a..e434a583ee7e 100644
--- a/drivers/xen/grant-table.c
+++ b/drivers/xen/grant-table.c
@@ -134,6 +134,10 @@ struct gnttab_ops {
 	 * return the frame.
 	 */
 	unsigned long (*end_foreign_transfer_ref)(grant_ref_t ref);
+	/*
+	 * Read the frame number related to a given grant reference.
+	 */
+	unsigned long (*read_frame)(grant_ref_t ref);
 };
 
 struct unmap_refs_callback_data {
@@ -331,6 +335,16 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly)
 }
 EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
 
+static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
+{
+	return gnttab_shared.v1[ref].frame;
+}
+
+static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
+{
+	return gnttab_shared.v2[ref].full_page.frame;
+}
+
 struct deferred_entry {
 	struct list_head list;
 	grant_ref_t ref;
@@ -360,12 +374,9 @@ static void gnttab_handle_deferred(struct timer_list *unused)
 		spin_unlock_irqrestore(&gnttab_list_lock, flags);
 		if (_gnttab_end_foreign_access_ref(entry->ref, entry->ro)) {
 			put_free_entry(entry->ref);
-			if (entry->page) {
-				pr_debug("freeing g.e. %#x (pfn %#lx)\n",
-					 entry->ref, page_to_pfn(entry->page));
-				put_page(entry->page);
-			} else
-				pr_info("freeing g.e. %#x\n", entry->ref);
+			pr_debug("freeing g.e. %#x (pfn %#lx)\n",
+				 entry->ref, page_to_pfn(entry->page));
+			put_page(entry->page);
 			kfree(entry);
 			entry = NULL;
 		} else {
@@ -390,9 +401,18 @@ static void gnttab_handle_deferred(struct timer_list *unused)
 static void gnttab_add_deferred(grant_ref_t ref, bool readonly,
 				struct page *page)
 {
-	struct deferred_entry *entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	struct deferred_entry *entry;
+	gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
 	const char *what = KERN_WARNING "leaking";
 
+	entry = kmalloc(sizeof(*entry), gfp);
+	if (!page) {
+		unsigned long gfn = gnttab_interface->read_frame(ref);
+
+		page = pfn_to_page(gfn_to_pfn(gfn));
+		get_page(page);
+	}
+
 	if (entry) {
 		unsigned long flags;
 
@@ -1284,6 +1304,7 @@ static const struct gnttab_ops gnttab_v1_ops = {
 	.update_entry			= gnttab_update_entry_v1,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v1,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v1,
+	.read_frame			= gnttab_read_frame_v1,
 };
 
 static const struct gnttab_ops gnttab_v2_ops = {
@@ -1295,6 +1316,7 @@ static const struct gnttab_ops gnttab_v2_ops = {
 	.update_entry			= gnttab_update_entry_v2,
 	.end_foreign_access_ref		= gnttab_end_foreign_access_ref_v2,
 	.end_foreign_transfer_ref	= gnttab_end_foreign_transfer_ref_v2,
+	.read_frame			= gnttab_read_frame_v2,
 };
 
 static bool gnttab_need_v2(void)
diff --git a/include/xen/grant_table.h b/include/xen/grant_table.h
index 7087c9fea80f..a58a89cc0e97 100644
--- a/include/xen/grant_table.h
+++ b/include/xen/grant_table.h
@@ -100,7 +100,12 @@ int gnttab_end_foreign_access_ref(grant_ref_t ref, int readonly);
  * Note that the granted page might still be accessed (read or write) by the
  * other side after gnttab_end_foreign_access() returns, so even if page was
  * specified as 0 it is not allowed to just reuse the page for other
- * purposes immediately.
+ * purposes immediately. gnttab_end_foreign_access() will take an additional
+ * reference to the granted page in this case, which is dropped only after
+ * the grant is no longer in use.
+ * This requires that multi page allocations for areas subject to
+ * gnttab_end_foreign_access() are done via alloc_pages_exact() (and freeing
+ * via free_pages_exact()) in order to avoid high order pages.
  */
 void gnttab_end_foreign_access(grant_ref_t ref, int readonly,
 			       unsigned long page);
-- 
Gitee


From 5dc640838ff5d519be706b06b977fa7c66ddf264 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@suse.com>
Date: Fri, 15 Apr 2022 14:44:19 +0800
Subject: [PATCH 215/340] xen/netfront: react properly to failing
 gnttab_end_foreign_access_ref()

stable inclusion
from linux-4.19.234
commit c307029d811e03546d18d0e512fe295b3103b8e5

--------------------------------

Commit 66e3531b33ee51dad17c463b4d9c9f52e341503d upstream.

When calling gnttab_end_foreign_access_ref() the returned value must
be tested and the reaction to that value should be appropriate.

In case of failure in xennet_get_responses() the reaction should not be
to crash the system, but to disable the network device.

The calls in setup_netfront() can be replaced by calls of
gnttab_end_foreign_access(). While at it avoid double free of ring
pages and grant references via xennet_disconnect_backend() in this case.

This is CVE-2022-23042 / part of XSA-396.

Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Laibin Qiu <qiulaibin@huawei.com>
---
 drivers/net/xen-netfront.c | 48 ++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 17 deletions(-)

diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index 2c879f07d3f2..376024c26270 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -838,7 +838,6 @@ static int xennet_get_responses(struct netfront_queue *queue,
 	int max = XEN_NETIF_NR_SLOTS_MIN + (rx->status <= RX_COPY_THRESHOLD);
 	int slots = 1;
 	int err = 0;
-	unsigned long ret;
 
 	if (rx->flags & XEN_NETRXF_extra_info) {
 		err = xennet_get_extras(queue, extras, rp);
@@ -869,8 +868,13 @@ static int xennet_get_responses(struct netfront_queue *queue,
 			goto next;
 		}
 
-		ret = gnttab_end_foreign_access_ref(ref, 0);
-		BUG_ON(!ret);
+		if (!gnttab_end_foreign_access_ref(ref, 0)) {
+			dev_alert(dev,
+				  "Grant still in use by backend domain\n");
+			queue->info->broken = true;
+			dev_alert(dev, "Disabled for further use\n");
+			return -EINVAL;
+		}
 
 		gnttab_release_grant_reference(&queue->gref_rx_head, ref);
 
@@ -1074,6 +1078,10 @@ static int xennet_poll(struct napi_struct *napi, int budget)
 		err = xennet_get_responses(queue, &rinfo, rp, &tmpq);
 
 		if (unlikely(err)) {
+			if (queue->info->broken) {
+				spin_unlock(&queue->rx_lock);
+				return 0;
+			}
 err:
 			while ((skb = __skb_dequeue(&tmpq)))
 				__skb_queue_tail(&errq, skb);
@@ -1644,7 +1652,7 @@ static int setup_netfront(struct xenbus_device *dev,
 			struct netfront_queue *queue, unsigned int feature_split_evtchn)
 {
 	struct xen_netif_tx_sring *txs;
-	struct xen_netif_rx_sring *rxs;
+	struct xen_netif_rx_sring *rxs = NULL;
 	grant_ref_t gref;
 	int err;
 
@@ -1664,21 +1672,21 @@ static int setup_netfront(struct xenbus_device *dev,
 
 	err = xenbus_grant_ring(dev, txs, 1, &gref);
 	if (err < 0)
-		goto grant_tx_ring_fail;
+		goto fail;
 	queue->tx_ring_ref = gref;
 
 	rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH);
 	if (!rxs) {
 		err = -ENOMEM;
 		xenbus_dev_fatal(dev, err, "allocating rx ring page");
-		goto alloc_rx_ring_fail;
+		goto fail;
 	}
 	SHARED_RING_INIT(rxs);
 	FRONT_RING_INIT(&queue->rx, rxs, XEN_PAGE_SIZE);
 
 	err = xenbus_grant_ring(dev, rxs, 1, &gref);
 	if (err < 0)
-		goto grant_rx_ring_fail;
+		goto fail;
 	queue->rx_ring_ref = gref;
 
 	if (feature_split_evtchn)
@@ -1691,22 +1699,28 @@ static int setup_netfront(struct xenbus_device *dev,
 		err = setup_netfront_single(queue);
 
 	if (err)
-		goto alloc_evtchn_fail;
+		goto fail;
 
 	return 0;
 
 	/* If we fail to setup netfront, it is safe to just revoke access to
 	 * granted pages because backend is not accessing it at this point.
 	 */
-alloc_evtchn_fail:
-	gnttab_end_foreign_access_ref(queue->rx_ring_ref, 0);
-grant_rx_ring_fail:
-	free_page((unsigned long)rxs);
-alloc_rx_ring_fail:
-	gnttab_end_foreign_access_ref(queue->tx_ring_ref, 0);
-grant_tx_ring_fail:
-	free_page((unsigned long)txs);
-fail:
+ fail:
+	if (queue->rx_ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(queue->rx_ring_ref, 0,
+					  (unsigned long)rxs);
+		queue->rx_ring_ref = GRANT_INVALID_REF;
+	} else {
+		free_page((unsigned long)rxs);
+	}
+	if (queue->tx_ring_ref != GRANT_INVALID_REF) {
+		gnttab_end_foreign_access(queue->tx_ring_ref, 0,
+					  (unsigned long)txs);
+		queue->tx_ring_ref = GRANT_INVALID_REF;
+	} else {
+		free_page((unsigned long)txs);
+	}
 	return err;
 }
 
-- 
Gitee


From dd9f1295656568a2d1afe89a375c59afa79cb186 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 16 Apr 2022 07:27:52 +0000
Subject: [PATCH 216/340] net: core: netlink: add helper refcount dec and lock
 function

stable inclusion
from linux-4.19.221
commit cd25f1099284a0cbe916344fc1e6c1ffed6c5306
category: bugfix
CVE: CVE-2021-39713

--------------------------------

[ Upstream commit 6f99528e9797794b91b43321fbbc93fe772b0803 ]

Rtnl lock is encapsulated in netlink and cannot be accessed by other
modules directly. This means that reference counted objects that rely on
rtnl lock cannot use it with refcounter helper function that atomically
releases decrements reference and obtains mutex.

This patch implements simple wrapper function around refcount_dec_and_lock
that obtains rtnl lock if reference counter value reached 0.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Lee: Sent to Stable]
Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568
Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/rtnetlink.h | 2 ++
 net/core/rtnetlink.c      | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5225832bd6ff..9cdd76348d9a 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -6,6 +6,7 @@
 #include <linux/mutex.h>
 #include <linux/netdevice.h>
 #include <linux/wait.h>
+#include <linux/refcount.h>
 #include <uapi/linux/rtnetlink.h>
 
 extern int rtnetlink_send(struct sk_buff *skb, struct net *net, u32 pid, u32 group, int echo);
@@ -34,6 +35,7 @@ extern void rtnl_unlock(void);
 extern int rtnl_trylock(void);
 extern int rtnl_is_locked(void);
 extern int rtnl_lock_killable(void);
+extern bool refcount_dec_and_rtnl_lock(refcount_t *r);
 
 extern wait_queue_head_t netdev_unregistering_wq;
 extern struct rw_semaphore pernet_ops_rwsem;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6cd0dbe42baa..2837cc03f69e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -130,6 +130,12 @@ int rtnl_is_locked(void)
 }
 EXPORT_SYMBOL(rtnl_is_locked);
 
+bool refcount_dec_and_rtnl_lock(refcount_t *r)
+{
+	return refcount_dec_and_mutex_lock(r, &rtnl_mutex);
+}
+EXPORT_SYMBOL(refcount_dec_and_rtnl_lock);
+
 #ifdef CONFIG_PROVE_LOCKING
 bool lockdep_rtnl_is_held(void)
 {
-- 
Gitee


From 5a1704c7f3c1f3499716b7f440efa002f6724a35 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 16 Apr 2022 07:27:53 +0000
Subject: [PATCH 217/340] net: sched: extend Qdisc with rcu

stable inclusion
from linux-4.19.221
commit f602ed9f8574512e7ea1ab65c3db7ba71053bf27
category: bugfix
CVE: CVE-2021-39713

--------------------------------

[ Upstream commit 3a7d0d07a386716b459b00783b11a8211cefcc0f ]

Currently, Qdisc API functions assume that users have rtnl lock taken. To
implement rtnl unlocked classifiers update interface, Qdisc API must be
extended with functions that do not require rtnl lock.

Extend Qdisc structure with rcu. Implement special version of put function
qdisc_put_unlocked() that is called without rtnl lock taken. This function
only takes rtnl lock if Qdisc reference counter reached zero and is
intended to be used as optimization.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Lee: Sent to Stable]
Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568
Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Conflict:
	net/sched/sch_generic.c
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/rtnetlink.h |  5 +++++
 include/net/pkt_sched.h   |  1 +
 include/net/sch_generic.h |  1 +
 net/sched/sch_api.c       | 18 ++++++++++++++++++
 net/sched/sch_generic.c   |  9 ++++++++-
 5 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 9cdd76348d9a..bb9cb84114c1 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -85,6 +85,11 @@ static inline struct netdev_queue *dev_ingress_queue(struct net_device *dev)
 	return rtnl_dereference(dev->ingress_queue);
 }
 
+static inline struct netdev_queue *dev_ingress_queue_rcu(struct net_device *dev)
+{
+	return rcu_dereference(dev->ingress_queue);
+}
+
 struct netdev_queue *dev_ingress_queue_create(struct net_device *dev);
 
 #ifdef CONFIG_NET_INGRESS
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index edca90ef3bdc..1a6ac924266d 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -103,6 +103,7 @@ int qdisc_set_default(const char *id);
 void qdisc_hash_add(struct Qdisc *q, bool invisible);
 void qdisc_hash_del(struct Qdisc *q);
 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle);
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle);
 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r,
 					struct nlattr *tab,
 					struct netlink_ext_ack *extack);
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index c9cd5086bd54..7ac17721c41a 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -108,6 +108,7 @@ struct Qdisc {
 
 	spinlock_t		busylock ____cacheline_aligned_in_smp;
 	spinlock_t		seqlock;
+	struct rcu_head		rcu;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2c9aa550763d..19c390ab2b7c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -315,6 +315,24 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
 	return q;
 }
 
+struct Qdisc *qdisc_lookup_rcu(struct net_device *dev, u32 handle)
+{
+	struct netdev_queue *nq;
+	struct Qdisc *q;
+
+	if (!handle)
+		return NULL;
+	q = qdisc_match_from_root(dev->qdisc, handle);
+	if (q)
+		goto out;
+
+	nq = dev_ingress_queue_rcu(dev);
+	if (nq)
+		q = qdisc_match_from_root(nq->qdisc_sleeping, handle);
+out:
+	return q;
+}
+
 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
 {
 	unsigned long cl;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index c2276a3c9dd8..ef41320a9e96 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -953,6 +953,13 @@ void qdisc_free(struct Qdisc *qdisc)
 	kfree((char *) qdisc - qdisc->padded);
 }
 
+void qdisc_free_cb(struct rcu_head *head)
+{
+	struct Qdisc *q = container_of(head, struct Qdisc, rcu);
+
+	qdisc_free(q);
+}
+
 void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops *ops;
@@ -990,7 +997,7 @@ void qdisc_destroy(struct Qdisc *qdisc)
 		kfree_skb_list(skb);
 	}
 
-	qdisc_free(qdisc);
+	call_rcu(&qdisc->rcu, qdisc_free_cb);
 }
 EXPORT_SYMBOL(qdisc_destroy);
 
-- 
Gitee


From 823b2cfc16d38dc146e599677f71ec86f5fff876 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 16 Apr 2022 07:27:54 +0000
Subject: [PATCH 218/340] net: sched: add helper function to take reference to
 Qdisc

stable inclusion
from linux-4.19.221
commit da1d324088c40fa0a382224c466175fc5c704106
category: bugfix
CVE: CVE-2021-39713

--------------------------------

[ Upstream commit 9d7e82cec35c027756ec97e274f878251f271181 ]

Implement function to take reference to Qdisc that relies on rcu read lock
instead of rtnl mutex. Function only takes reference to Qdisc if reference
counter isn't zero. Intended to be used by unlocked cls API.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Lee: Sent to Stable]
Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568
Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/sch_generic.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7ac17721c41a..b86acf3d1ea6 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -118,6 +118,19 @@ static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
 	refcount_inc(&qdisc->refcnt);
 }
 
+/* Intended to be used by unlocked users, when concurrent qdisc release is
+ * possible.
+ */
+
+static inline struct Qdisc *qdisc_refcount_inc_nz(struct Qdisc *qdisc)
+{
+	if (qdisc->flags & TCQ_F_BUILTIN)
+		return qdisc;
+	if (refcount_inc_not_zero(&qdisc->refcnt))
+		return qdisc;
+	return NULL;
+}
+
 static inline bool qdisc_is_running(struct Qdisc *qdisc)
 {
 	if (qdisc->flags & TCQ_F_NOLOCK)
-- 
Gitee


From bad618de2fea9395c3e6a04f007bc88f801fc52b Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Sat, 16 Apr 2022 07:27:55 +0000
Subject: [PATCH 219/340] net: sched: use Qdisc rcu API instead of relying on
 rtnl lock

stable inclusion
from linux-4.19.221
commit ae214e04b95ff64a4b0e9aab6742520bfde6ff0c
category: bugfix
CVE: CVE-2021-39713

--------------------------------

[ Upstream commit e368fdb61d8e7c67ac70791b23345b26d7bbc661 ]

As a preparation from removing rtnl lock dependency from rules update path,
use Qdisc rcu and reference counting capabilities instead of relying on
rtnl lock while working with Qdiscs. Create new tcf_block_release()
function, and use it to free resources taken by tcf_block_find().
Currently, this function only releases Qdisc and it is extended in next
patches in this series.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Acked-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
[Lee: Sent to Stable]
Link: https://syzkaller.appspot.com/bug?id=d7e411c5472dd5da33d8cc921ccadc747743a568
Reported-by: syzbot+5f229e48cccc804062c0@syzkaller.appspotmail.com
Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Conflict:
	net/sched/cls_api.c
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/sched/cls_api.c | 79 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 64 insertions(+), 15 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 4413aa8d4e82..ff05f08010c0 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -539,6 +539,7 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 					struct netlink_ext_ack *extack)
 {
 	struct tcf_block *block;
+	int err = 0;
 
 	if (ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
 		block = tcf_block_lookup(net, block_index);
@@ -550,55 +551,93 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 		const struct Qdisc_class_ops *cops;
 		struct net_device *dev;
 
+		rcu_read_lock();
+
 		/* Find link */
-		dev = __dev_get_by_index(net, ifindex);
-		if (!dev)
+		dev = dev_get_by_index_rcu(net, ifindex);
+		if (!dev) {
+			rcu_read_unlock();
 			return ERR_PTR(-ENODEV);
+		}
 
 		/* Find qdisc */
 		if (!*parent) {
 			*q = dev->qdisc;
 			*parent = (*q)->handle;
 		} else {
-			*q = qdisc_lookup(dev, TC_H_MAJ(*parent));
+			*q = qdisc_lookup_rcu(dev, TC_H_MAJ(*parent));
 			if (!*q) {
 				NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
-				return ERR_PTR(-EINVAL);
+				err = -EINVAL;
+				goto errout_rcu;
 			}
 		}
 
+		*q = qdisc_refcount_inc_nz(*q);
+		if (!*q) {
+			NL_SET_ERR_MSG(extack, "Parent Qdisc doesn't exists");
+			err = -EINVAL;
+			goto errout_rcu;
+		}
+
 		/* Is it classful? */
 		cops = (*q)->ops->cl_ops;
 		if (!cops) {
 			NL_SET_ERR_MSG(extack, "Qdisc not classful");
-			return ERR_PTR(-EINVAL);
+			err = -EINVAL;
+			goto errout_rcu;
 		}
 
 		if (!cops->tcf_block) {
 			NL_SET_ERR_MSG(extack, "Class doesn't support blocks");
-			return ERR_PTR(-EOPNOTSUPP);
+			err = -EOPNOTSUPP;
+			goto errout_rcu;
 		}
 
+		/* At this point we know that qdisc is not noop_qdisc,
+		 * which means that qdisc holds a reference to net_device
+		 * and we hold a reference to qdisc, so it is safe to release
+		 * rcu read lock.
+		 */
+		rcu_read_unlock();
+
 		/* Do we search for filter, attached to class? */
 		if (TC_H_MIN(*parent)) {
 			*cl = cops->find(*q, *parent);
 			if (*cl == 0) {
 				NL_SET_ERR_MSG(extack, "Specified class doesn't exist");
-				return ERR_PTR(-ENOENT);
+				err = -ENOENT;
+				goto errout_qdisc;
 			}
 		}
 
 		/* And the last stroke */
 		block = cops->tcf_block(*q, *cl, extack);
-		if (!block)
-			return ERR_PTR(-EINVAL);
+		if (!block) {
+			err = -EINVAL;
+			goto errout_qdisc;
+		}
 		if (tcf_block_shared(block)) {
 			NL_SET_ERR_MSG(extack, "This filter block is shared. Please use the block index to manipulate the filters");
-			return ERR_PTR(-EOPNOTSUPP);
+			err = -EOPNOTSUPP;
+			goto errout_qdisc;
 		}
 	}
 
 	return block;
+
+errout_rcu:
+	rcu_read_unlock();
+errout_qdisc:
+	if (*q)
+		qdisc_destroy(*q);
+	return ERR_PTR(err);
+}
+
+static void tcf_block_release(struct Qdisc *q, struct tcf_block *block)
+{
+	if (q)
+		qdisc_destroy(q);
 }
 
 struct tcf_block_owner_item {
@@ -1336,6 +1375,7 @@ static int tc_new_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
@@ -1457,6 +1497,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	return err;
 }
 
@@ -1542,6 +1583,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
 errout:
 	if (chain)
 		tcf_chain_put(chain);
+	tcf_block_release(q, block);
 	return err;
 }
 
@@ -1858,7 +1900,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 	chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0;
 	if (chain_index > TC_ACT_EXT_VAL_MASK) {
 		NL_SET_ERR_MSG(extack, "Specified chain index exceeds upper limit");
-		return -EINVAL;
+		err = -EINVAL;
+		goto errout_block;
 	}
 	chain = tcf_chain_lookup(block, chain_index);
 	if (n->nlmsg_type == RTM_NEWCHAIN) {
@@ -1870,23 +1913,27 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 				tcf_chain_hold(chain);
 			} else {
 				NL_SET_ERR_MSG(extack, "Filter chain already exists");
-				return -EEXIST;
+				err = -EEXIST;
+				goto errout_block;
 			}
 		} else {
 			if (!(n->nlmsg_flags & NLM_F_CREATE)) {
 				NL_SET_ERR_MSG(extack, "Need both RTM_NEWCHAIN and NLM_F_CREATE to create a new chain");
-				return -ENOENT;
+				err = -ENOENT;
+				goto errout_block;
 			}
 			chain = tcf_chain_create(block, chain_index);
 			if (!chain) {
 				NL_SET_ERR_MSG(extack, "Failed to create filter chain");
-				return -ENOMEM;
+				err = -ENOMEM;
+				goto errout_block;
 			}
 		}
 	} else {
 		if (!chain || tcf_chain_held_by_acts_only(chain)) {
 			NL_SET_ERR_MSG(extack, "Cannot find specified filter chain");
-			return -EINVAL;
+			err = -EINVAL;
+			goto errout_block;
 		}
 		tcf_chain_hold(chain);
 	}
@@ -1930,6 +1977,8 @@ static int tc_ctl_chain(struct sk_buff *skb, struct nlmsghdr *n,
 
 errout:
 	tcf_chain_put(chain);
+errout_block:
+	tcf_block_release(q, block);
 	if (err == -EAGAIN)
 		/* Replay the request. */
 		goto replay;
-- 
Gitee


From 90aafa084597f886533445325239fc84b762391b Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sat, 16 Apr 2022 07:27:56 +0000
Subject: [PATCH 220/340] net_sched: fix a crash in tc_new_tfilter()

stable inclusion
from linux-4.19.221
commit f9ff09e266ca70c801b9911280f6ae64c9183d85
category: bugfix
CVE: CVE-2021-39713

--------------------------------

commit 460b360104d51552a57f39e54b2589c9fd7fa0b3 upstream.

When tcf_block_find() fails, it already rollbacks the qdisc refcnt,
so its caller doesn't need to clean up this again. Avoid calling
qdisc_put() again by resetting qdisc to NULL for callers.

Reported-by: syzbot+37b8770e6d5a8220a039@syzkaller.appspotmail.com
Fixes: e368fdb61d8e ("net: sched: use Qdisc rcu API instead of relying on rtnl lock")
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Conflict:
	net/sched/cls_api.c
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/sched/cls_api.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index ff05f08010c0..3ca18e3a2be9 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -629,8 +629,10 @@ static struct tcf_block *tcf_block_find(struct net *net, struct Qdisc **q,
 errout_rcu:
 	rcu_read_unlock();
 errout_qdisc:
-	if (*q)
+	if (*q) {
 		qdisc_destroy(*q);
+		*q = NULL;
+	}
 	return ERR_PTR(err);
 }
 
-- 
Gitee


From fe484adc98d528c23eed20fd03539b1ee3717547 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 16 Apr 2022 07:27:57 +0000
Subject: [PATCH 221/340] net: sched: adapt Qdisc kabi

hulk inclusion
category: bugfix
CVE: CVE-2021-39713

--------------------------------

To adapt to KABI, put rcu before gso_skb for 64-bit kernel. RCU will use
16 Bytes, and the space is enough. It's unuse for 32-bit kernel.

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/sch_generic.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index b86acf3d1ea6..f5f45390828d 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -93,7 +93,13 @@ struct Qdisc {
 	struct gnet_stats_queue	__percpu *cpu_qstats;
 	int			padded;
 	refcount_t		refcnt;
-
+#ifndef __GENKSYMS__
+	/* To adapt to KABI, put rcu before gso_skb for 64-bit kernel.
+	 * RCU will use 16 Bytes, and the space is enough. It's unuse
+	 * for 32-bit kernel.
+	 */
+	struct rcu_head         rcu;
+#endif
 	/*
 	 * For performance sake on SMP, we put highly modified fields at the end
 	 */
@@ -108,7 +114,6 @@ struct Qdisc {
 
 	spinlock_t		busylock ____cacheline_aligned_in_smp;
 	spinlock_t		seqlock;
-	struct rcu_head		rcu;
 };
 
 static inline void qdisc_refcount_inc(struct Qdisc *qdisc)
-- 
Gitee


From d138b1072c07c9c7e800eab0ce9cbe8481c455a3 Mon Sep 17 00:00:00 2001
From: Zhang Wensheng <zhangwensheng5@huawei.com>
Date: Sat, 16 Apr 2022 07:27:58 +0000
Subject: [PATCH 222/340] nbd: fix possible overflow on 'first_minor' in
 nbd_dev_add()

hulk inclusion
category: bugfix, https://gitee.com/openeuler/kernel/issues/I51ABL
bugzilla: 186386
CVE: NA

--------------------------------

When 'index' is a big numbers, it may become negative which forced
to 'int'. then 'index << part_shift' might overflow to a positive
value that is not greater than '0xfffff', then sysfs might complains
about duplicate creation. Because of this, move the 'index' judgment
to the front will fix it and be better.

Fixes: b0d9111a2d53 ("nbd: use an idr to keep track of nbd devices")
Fixes: 940c264984fd ("nbd: fix possible overflow for 'first_minor' in nbd_dev_add()")
Signed-off-by: Zhang Wensheng <zhangwensheng5@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/block/nbd.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index a35189098581..c2e5ba599418 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -1704,14 +1704,6 @@ static int nbd_dev_add(int index)
 	int err = -ENOMEM;
 	int first_minor = index << part_shift;
 
-	/*
-	 * Too big index can cause duplicate creation of sysfs files/links,
-	 * because MKDEV() expect that the max first minor is MINORMASK, or
-	 * index << part_shift can overflow.
-	 */
-	if (first_minor < index || first_minor > MINORMASK)
-		return -EINVAL;
-
 	nbd = kzalloc(sizeof(struct nbd_device), GFP_KERNEL);
 	if (!nbd)
 		goto out;
@@ -1844,8 +1836,20 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info)
 	if (!netlink_capable(skb, CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (info->attrs[NBD_ATTR_INDEX])
+	if (info->attrs[NBD_ATTR_INDEX]) {
 		index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]);
+
+		/*
+		 * Too big first_minor can cause duplicate creation of
+		 * sysfs files/links, since index << part_shift might
+		 * overflow, or MKDEV() expect that the max bits of
+		 * first_minor is 20.
+		 */
+		if (index < 0 || index > MINORMASK >> part_shift) {
+			printk(KERN_ERR "nbd: illegal input index %d\n", index);
+			return -EINVAL;
+		}
+	}
 	if (!info->attrs[NBD_ATTR_SOCKETS]) {
 		printk(KERN_ERR "nbd: must specify at least one socket\n");
 		return -EINVAL;
-- 
Gitee


From d9c6a251740db75a3b8b69eb4e0cd784d57794f7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 16 Apr 2022 09:17:54 +0000
Subject: [PATCH 223/340] netfilter: nf_tables: initialize registers in
 nft_do_chain()

mainline inclusion
from mainline-v5.18-rc1
commit 4c905f6740a365464e91467aa50916555b28213d
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I50WAZ
CVE: CVE-2022-1016

-------------------------------------------------

Initialize registers to avoid stack leak into userspace.

Fixes: 96518518cc41 ("netfilter: add nftables")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>

conflict:
	net/netfilter/nf_tables_core.c

Signed-off-by: Lu Wei luwei32@huawei.com
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/netfilter/nf_tables_core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index a3850414dba2..7dfaad783cd5 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -144,7 +144,7 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv)
 	struct nft_rule *const *rules;
 	const struct nft_rule *rule;
 	const struct nft_expr *expr, *last;
-	struct nft_regs regs;
+	struct nft_regs regs = {};
 	unsigned int stackptr = 0;
 	struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE];
 	bool genbit = READ_ONCE(net->nft.gencursor);
-- 
Gitee


From 742a0b5b67461e695f406a3b028860cfef1ae838 Mon Sep 17 00:00:00 2001
From: Xunlei Pang <xlpang@linux.alibaba.com>
Date: Tue, 19 Apr 2022 01:27:54 +0000
Subject: [PATCH 224/340] sched/fair: Fix wrong cpu selecting from isolated
 domain

mainline inclusion
from mainline-v5.10-rc1
commit df3cb4ea1fb63ff326488efd671ba3c39034255e
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I52QKB
CVE: NA

--------------------------------

We've met problems that occasionally tasks with full cpumask
(e.g. by putting it into a cpuset or setting to full affinity)
were migrated to our isolated cpus in production environment.

After some analysis, we found that it is due to the current
select_idle_smt() not considering the sched_domain mask.

Steps to reproduce on my 31-CPU hyperthreads machine:
1. with boot parameter: "isolcpus=domain,2-31"
   (thread lists: 0,16 and 1,17)
2. cgcreate -g cpu:test; cgexec -g cpu:test "test_threads"
3. some threads will be migrated to the isolated cpu16~17.

Fix it by checking the valid domain mask in select_idle_smt().

Fixes: 10e2f1acd010 ("sched/core: Rewrite and improve select_idle_siblings())
Reported-by: Wetp Zhang <wetp.zy@linux.alibaba.com>
Signed-off-by: Xunlei Pang <xlpang@linux.alibaba.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Jiang Biao <benbjiang@tencent.com>
Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/1600930127-76857-1-git-send-email-xlpang@linux.alibaba.com

Conflicts:
	kernel/sched/fair.c

Signed-off-by: Yu jiahua <yujiahua1@huawei.com>
Reviewed-by: zheng zucheng <zhengzucheng@huawei.com>
Reviewed-by: zheng zucheng <zhengzucheng@huawei.com>
Reviewed-by: Zhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/sched/fair.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4c7c0144a852..89937f7ca0bd 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6256,7 +6256,8 @@ static int select_idle_smt(struct task_struct *p, struct sched_domain *sd, int t
 		return -1;
 
 	for_each_cpu(cpu, cpu_smt_mask(target)) {
-		if (!cpumask_test_cpu(cpu, &p->cpus_allowed))
+		if (!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+		    !cpumask_test_cpu(cpu, sched_domain_span(sd)))
 			continue;
 		if (available_idle_cpu(cpu) || sched_idle_cpu(cpu))
 			return cpu;
-- 
Gitee


From af98db5ff58f3657d68ac5f744de3c9ad69388ac Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 19 Apr 2022 01:27:55 +0000
Subject: [PATCH 225/340] sched: Fix yet more sched_fork() races

mainline inclusion
from mainline-v5.17-rc5
commit b1e8206582f9d680cff7d04828708c8b6ab32957
category: bugfix
bugzilla: 186609, https://gitee.com/openeuler/kernel/issues/I532B0
CVE: NA

--------------------------------

Where commit 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an
invalid sched_task_group") fixed a fork race vs cgroup, it opened up a
race vs syscalls by not placing the task on the runqueue before it
gets exposed through the pidhash.

Commit 13765de8148f ("sched/fair: Fix fault in reweight_entity") is
trying to fix a single instance of this, instead fix the whole class
of issues, effectively reverting this commit.

Fixes: 4ef0c5c6b5ba ("kernel/sched: Fix sched_fork() access an invalid sched_task_group")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Tested-by: Zhang Qiao <zhangqiao22@huawei.com>
Tested-by: Dietmar Eggemann <dietmar.eggemann@arm.com>
Link: https://lkml.kernel.org/r/YgoeCbwj5mbCR0qA@hirez.programming.kicks-ass.net

conflict:
	include/linux/sched/task.h
	kernel/fork.c
	kernel/sched/core.c

Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Reviewed-by: Chen Hui <judy.chenhui@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/sched/task.h |  2 +-
 kernel/fork.c              | 12 +++++++++++-
 kernel/sched/core.c        |  6 +++++-
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index c8b52b3ec865..1c2c099e393b 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -33,7 +33,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev);
 extern void init_idle(struct task_struct *idle, int cpu);
 
 extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
-extern void sched_post_fork(struct task_struct *p);
+extern void sched_cgroup_fork(struct task_struct *p);
 extern void sched_dead(struct task_struct *p);
 
 void __noreturn do_task_dead(void);
diff --git a/kernel/fork.c b/kernel/fork.c
index 88463fd56930..231b01eba6e1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2063,6 +2063,17 @@ static __latent_entropy struct task_struct *copy_process(
 	if (retval)
 		goto bad_fork_free_futex_mutex;
 
+	/*
+	 * Now that the cgroups are pinned, re-clone the parent cgroup and put
+	 * the new task on the correct runqueue. All this *before* the task
+	 * becomes visible.
+	 *
+	 * This isn't part of ->can_fork() because while the re-cloning is
+	 * cgroup specific, it unconditionally needs to place the task on a
+	 * runqueue.
+	 */
+	sched_cgroup_fork(p);
+
 	/*
 	 * From this point on we must avoid any synchronous user-space
 	 * communication until we take the tasklist-lock. In particular, we do
@@ -2171,7 +2182,6 @@ static __latent_entropy struct task_struct *copy_process(
 
 	proc_fork_connector(p);
 	cgroup_post_fork(p);
-	sched_post_fork(p);
 	cgroup_threadgroup_change_end(current);
 	perf_event_fork(p);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 496ce71f93a7..b09153710259 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2357,8 +2357,9 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
 	return 0;
 }
 
-void sched_post_fork(struct task_struct *p)
+void sched_cgroup_fork(struct task_struct *p)
 {
+
 	unsigned long flags;
 
 	/*
@@ -2369,6 +2370,9 @@ void sched_post_fork(struct task_struct *p)
 	 * Silence PROVE_RCU.
 	 */
 	raw_spin_lock_irqsave(&p->pi_lock, flags);
+#ifdef CONFIG_CGROUP_SCHED
+	p->sched_task_group = task_group(current);
+ #endif
 	rseq_migrate(p);
 	/*
 	 * We're setting the CPU for the first time, we don't migrate,
-- 
Gitee


From b759debf9a3c0d5baf38aa4575bef80689672b88 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Tue, 19 Apr 2022 01:27:56 +0000
Subject: [PATCH 226/340] Revert "ext4: fix file system corrupted when rmdir
 non empty directory with IO error"

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I52WOM
CVE: NA

--------------------------------

This reverts commit 35bd50bcc346402fb5bac3e3393876f6c5644ca9.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/namei.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 84f68e15e853..58dd74b80480 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2750,7 +2750,7 @@ bool ext4_empty_dir(struct inode *inode)
 	 */
 	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
 	if (IS_ERR(bh))
-		return false;
+		return true;
 
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
@@ -2781,7 +2781,7 @@ bool ext4_empty_dir(struct inode *inode)
 				continue;
 			}
 			if (IS_ERR(bh))
-				return false;
+				return true;
 		}
 		de = (struct ext4_dir_entry_2 *) (bh->b_data +
 					(offset & (sb->s_blocksize - 1)));
-- 
Gitee


From 5ac697029bbf28b7bd762b3558ec79024c75aa00 Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Tue, 19 Apr 2022 01:27:57 +0000
Subject: [PATCH 227/340] ext4: fix fs corruption when tring to remove a
 non-empty directory with IO error

mainline inclusion
from mainline-v5.18-rc1
commit 7aab5c84a0f6ec2290e2ba4a6b245178b1bf949a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I52WOM
CVE: NA

--------------------------------

We inject IO error when rmdir non empty direcory, then got issue as follows:
step1: mkfs.ext4 -F /dev/sda
step2: mount /dev/sda  test
step3: cd test
step4: mkdir -p 1/2
step5: rmdir 1
	[  110.920551] ext4_empty_dir: inject fault
	[  110.921926] EXT4-fs warning (device sda): ext4_rmdir:3113: inode #12:
	comm rmdir: empty directory '1' has too many links (3)
step6: cd ..
step7: umount test
step8: fsck.ext4 -f /dev/sda
	e2fsck 1.42.9 (28-Dec-2013)
	Pass 1: Checking inodes, blocks, and sizes
	Pass 2: Checking directory structure
	Entry '..' in .../??? (13) has deleted/unused inode 12.  Clear<y>? yes
	Pass 3: Checking directory connectivity
	Unconnected directory inode 13 (...)
	Connect to /lost+found<y>? yes
	Pass 4: Checking reference counts
	Inode 13 ref count is 3, should be 2.  Fix<y>? yes
	Pass 5: Checking group summary information

	/dev/sda: ***** FILE SYSTEM WAS MODIFIED *****
	/dev/sda: 12/131072 files (0.0% non-contiguous), 26157/524288 blocks

ext4_rmdir
	if (!ext4_empty_dir(inode))
		goto end_rmdir;
ext4_empty_dir
	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
	if (IS_ERR(bh))
		return true;
Now if read directory block failed, 'ext4_empty_dir' will return true, assume
directory is empty. Obviously, it will lead to above issue.
To solve this issue, if read directory block failed 'ext4_empty_dir' just
return false. To avoid making things worse when file system is already
corrupted, 'ext4_empty_dir' also return false.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Cc: stable@kernel.org
Link: https://lore.kernel.org/r/20220228024815.3952506-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

conflicts:
fs/ext4/namei.c

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/inline.c |  9 ++++-----
 fs/ext4/namei.c  | 10 +++++-----
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index abb71ba0b1e6..b5d172bf3318 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -1775,19 +1775,20 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 	void *inline_pos;
 	unsigned int offset;
 	struct ext4_dir_entry_2 *de;
-	bool ret = true;
+	bool ret = false;
 
 	err = ext4_get_inode_loc(dir, &iloc);
 	if (err) {
 		EXT4_ERROR_INODE_ERR(dir, -err,
 				     "error %d getting inode %lu block",
 				     err, dir->i_ino);
-		return true;
+		return false;
 	}
 
 	down_read(&EXT4_I(dir)->xattr_sem);
 	if (!ext4_has_inline_data(dir)) {
 		*has_inline_data = 0;
+		ret = true;
 		goto out;
 	}
 
@@ -1796,7 +1797,6 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 		ext4_warning(dir->i_sb,
 			     "bad inline directory (dir #%lu) - no `..'",
 			     dir->i_ino);
-		ret = true;
 		goto out;
 	}
 
@@ -1815,16 +1815,15 @@ bool empty_inline_dir(struct inode *dir, int *has_inline_data)
 				     dir->i_ino, le32_to_cpu(de->inode),
 				     le16_to_cpu(de->rec_len), de->name_len,
 				     inline_size);
-			ret = true;
 			goto out;
 		}
 		if (le32_to_cpu(de->inode)) {
-			ret = false;
 			goto out;
 		}
 		offset += ext4_rec_len_from_disk(de->rec_len, inline_size);
 	}
 
+	ret = true;
 out:
 	up_read(&EXT4_I(dir)->xattr_sem);
 	brelse(iloc.bh);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 58dd74b80480..867deea2fbdb 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2743,14 +2743,14 @@ bool ext4_empty_dir(struct inode *inode)
 	sb = inode->i_sb;
 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2)) {
 		EXT4_ERROR_INODE(inode, "invalid size");
-		return true;
+		return false;
 	}
 	/* The first directory block must not be a hole,
 	 * so treat it as DIRENT_HTREE
 	 */
 	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
 	if (IS_ERR(bh))
-		return true;
+		return false;
 
 	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
@@ -2758,7 +2758,7 @@ bool ext4_empty_dir(struct inode *inode)
 	    le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
 		ext4_warning_inode(inode, "directory missing '.'");
 		brelse(bh);
-		return true;
+		return false;
 	}
 	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
 	de = ext4_next_entry(de, sb->s_blocksize);
@@ -2767,7 +2767,7 @@ bool ext4_empty_dir(struct inode *inode)
 	    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
 		ext4_warning_inode(inode, "directory missing '..'");
 		brelse(bh);
-		return true;
+		return false;
 	}
 	offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
 	while (offset < inode->i_size) {
@@ -2781,7 +2781,7 @@ bool ext4_empty_dir(struct inode *inode)
 				continue;
 			}
 			if (IS_ERR(bh))
-				return true;
+				return false;
 		}
 		de = (struct ext4_dir_entry_2 *) (bh->b_data +
 					(offset & (sb->s_blocksize - 1)));
-- 
Gitee


From 2a98e82a07909c97bc33d47e895b76dc9ca4a4fc Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Wed, 20 Apr 2022 11:53:20 +0000
Subject: [PATCH 228/340] mm/memory.c: fix clear_gigantic_page_chunk

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5323R
CVE: NA

--------------------------------

Patch "mm: parallelize clear_gigantic_page" put mem_map_next() after
clear_user_highpage but not modify the parameter i. So it's actually the
previous page being cleared in each loop. As a result, the last page has
not been cleared. Fix it by put mem_map_next() back to the original
position.

Fixes: ae0cd4d46ced ("mm: parallelize clear_gigantic_page")
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Wei Li <liwei391@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/memory.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index d4853970a7c1..98c2872f8415 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4726,11 +4726,9 @@ static int clear_gigantic_page_chunk(unsigned long start, unsigned long end,
 	unsigned long i;
 
 	might_sleep();
-	for (i = start; i < end; ++i) {
+	for (i = start; i < end; i++, p = mem_map_next(p, base_page, i)) {
 		cond_resched();
 		clear_user_highpage(p, addr + i * PAGE_SIZE);
-
-		p = mem_map_next(p, base_page, i);
 	}
 
 	return KTASK_RETURN_SUCCESS;
-- 
Gitee


From b5aab4b87f82f5f3d78053df16faa72c62457c7c Mon Sep 17 00:00:00 2001
From: Borislav Petkov <bp@suse.de>
Date: Wed, 20 Apr 2022 11:53:21 +0000
Subject: [PATCH 229/340] x86/speculation: Merge one test in
 spectre_v2_user_select_mitigation()

stable inclusion
from stable-v4.19.234
commit d86fc674172edc0794f84198405f59f00b71512a
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit a5ce9f2bb665d1d2b31f139a02dbaa2dfbb62fa6 upstream.

Merge the test whether the CPU supports STIBP into the test which
determines whether STIBP is required. Thus try to simplify what is
already an insane logic.

Remove a superfluous newline in a comment, while at it.

Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Anthony Steinhauser <asteinhauser@google.com>
Link: https://lkml.kernel.org/r/20200615065806.GB14668@zn.tnic
[fllinden@amazon.com: fixed contextual conflict (comment) for 4.19]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/x85/kernel/cpu/bugs.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 54ec900bd259..7dd614930801 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -756,10 +756,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	}
 
 	/*
-	 * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+	 * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
 	 * required.
 	 */
-	if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+	if (!boot_cpu_has(X86_FEATURE_STIBP) ||
+	    !smt_possible ||
+	    spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
 		return;
 
 	/*
@@ -771,12 +773,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	    boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
 		mode = SPECTRE_V2_USER_STRICT_PREFERRED;
 
-	/*
-	 * If STIBP is not available, clear the STIBP mode.
-	 */
-	if (!boot_cpu_has(X86_FEATURE_STIBP))
-		mode = SPECTRE_V2_USER_NONE;
-
 	spectre_v2_user_stibp = mode;
 
 set_mode:
-- 
Gitee


From 989c0a1dc345ef7e97c64e38957b58b236f2a262 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Apr 2022 11:53:22 +0000
Subject: [PATCH 230/340] x86,bugs: Unconditionally allow
 spectre_v2=retpoline,amd

stable inclusion
from stable-v4.19.234
commit 1dd7efcfcccee59c974f56848739d4f5a5a570c7
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit f8a66d608a3e471e1202778c2a36cbdc96bae73b upstream.

Currently Linux prevents usage of retpoline,amd on !AMD hardware, this
is unfriendly and gets in the way of testing. Remove this restriction.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Borislav Petkov <bp@suse.de>
Acked-by: Josh Poimboeuf <jpoimboe@redhat.com>
Tested-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20211026120310.487348118@infradead.org
[fllinden@amazon.com: backported to 4.19 (no Hygon in 4.19)]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/x85/kernel/cpu/bugs.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7dd614930801..791f7ee58b82 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -839,13 +839,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
-	if (cmd == SPECTRE_V2_CMD_RETPOLINE_AMD &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_HYGON &&
-	    boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
-		pr_err("retpoline,amd selected but CPU is not AMD. Switching to AUTO select\n");
-		return SPECTRE_V2_CMD_AUTO;
-	}
-
 	spec_v2_print_cond(mitigation_options[i].option,
 			   mitigation_options[i].secure);
 	return cmd;
-- 
Gitee


From 827e6afa42b1bdf60c02583061237a254512e441 Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Wed, 20 Apr 2022 11:53:23 +0000
Subject: [PATCH 231/340] x86/speculation: Rename RETPOLINE_AMD to
 RETPOLINE_LFENCE

stable inclusion
from stable-v4.19.234
commit 25440a8c77dd2fde6a8e9cfc0c616916febf408e
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit d45476d9832409371537013ebdd8dc1a7781f97a upstream.

The RETPOLINE_AMD name is unfortunate since it isn't necessarily
AMD only, in fact Hygon also uses it. Furthermore it will likely be
sufficient for some Intel processors. Therefore rename the thing to
RETPOLINE_LFENCE to better describe what it is.

Add the spectre_v2=retpoline,lfence option as an alias to
spectre_v2=retpoline,amd to preserve existing setups. However, the output
of /sys/devices/system/cpu/vulnerabilities/spectre_v2 will be changed.

  [ bp: Fix typos, massage. ]

Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[fllinden@amazon.com: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/x85/kernel/cpu/bugs.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/include/asm/cpufeatures.h       |  2 +-
 arch/x86/include/asm/nospec-branch.h     | 12 +++++-----
 arch/x86/kernel/cpu/bugs.c               | 29 +++++++++++++++---------
 tools/arch/x86/include/asm/cpufeatures.h |  2 +-
 4 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 0a4145100c3a..a75ef95455d9 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -224,7 +224,7 @@
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE	( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
 #define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index e3f70c60e8cc..5bc65944fd6e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -119,7 +119,7 @@
 	ANNOTATE_NOSPEC_ALTERNATIVE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg),	\
 		__stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE,	\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	jmp	*\reg
 #endif
@@ -130,7 +130,7 @@
 	ANNOTATE_NOSPEC_ALTERNATIVE
 	ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg),	\
 		__stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\
-		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD
+		__stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_LFENCE
 #else
 	call	*\reg
 #endif
@@ -181,7 +181,7 @@
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+	X86_FEATURE_RETPOLINE_LFENCE)
 # define THUNK_TARGET(addr) [thunk_target] "r" (addr)
 
 #else /* CONFIG_X86_32 */
@@ -211,7 +211,7 @@
 	"lfence;\n"						\
 	ANNOTATE_RETPOLINE_SAFE					\
 	"call *%[thunk_target]\n",				\
-	X86_FEATURE_RETPOLINE_AMD)
+	X86_FEATURE_RETPOLINE_LFENCE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
@@ -223,8 +223,8 @@
 /* The Spectre V2 mitigation variants */
 enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
-	SPECTRE_V2_RETPOLINE_GENERIC,
-	SPECTRE_V2_RETPOLINE_AMD,
+	SPECTRE_V2_RETPOLINE,
+	SPECTRE_V2_LFENCE,
 	SPECTRE_V2_IBRS_ENHANCED,
 };
 
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 791f7ee58b82..09d03300a626 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -621,7 +621,7 @@ enum spectre_v2_mitigation_cmd {
 	SPECTRE_V2_CMD_FORCE,
 	SPECTRE_V2_CMD_RETPOLINE,
 	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
-	SPECTRE_V2_CMD_RETPOLINE_AMD,
+	SPECTRE_V2_CMD_RETPOLINE_LFENCE,
 };
 
 enum spectre_v2_user_cmd {
@@ -781,8 +781,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 
 static const char * const spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
-	[SPECTRE_V2_RETPOLINE_GENERIC]		= "Mitigation: Full generic retpoline",
-	[SPECTRE_V2_RETPOLINE_AMD]		= "Mitigation: Full AMD retpoline",
+	[SPECTRE_V2_RETPOLINE]			= "Mitigation: Retpolines",
+	[SPECTRE_V2_LFENCE]			= "Mitigation: LFENCE",
 	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
 };
 
@@ -794,7 +794,8 @@ static const struct {
 	{ "off",		SPECTRE_V2_CMD_NONE,		  false },
 	{ "on",			SPECTRE_V2_CMD_FORCE,		  true  },
 	{ "retpoline",		SPECTRE_V2_CMD_RETPOLINE,	  false },
-	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_AMD,	  false },
+	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
+	{ "retpoline,lfence",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
 	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
 	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
 };
@@ -832,13 +833,19 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	}
 
 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
-	     cmd == SPECTRE_V2_CMD_RETPOLINE_AMD ||
+	     cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
 		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
+	if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE) &&
+	    !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+		pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", mitigation_options[i].option);
+		return SPECTRE_V2_CMD_AUTO;
+	}
+
 	spec_v2_print_cond(mitigation_options[i].option,
 			   mitigation_options[i].secure);
 	return cmd;
@@ -873,9 +880,9 @@ static void __init spectre_v2_select_mitigation(void)
 		if (IS_ENABLED(CONFIG_RETPOLINE))
 			goto retpoline_auto;
 		break;
-	case SPECTRE_V2_CMD_RETPOLINE_AMD:
+	case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
 		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_amd;
+			goto retpoline_lfence;
 		break;
 	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
 		if (IS_ENABLED(CONFIG_RETPOLINE))
@@ -892,17 +899,17 @@ static void __init spectre_v2_select_mitigation(void)
 retpoline_auto:
 	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
 	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
-	retpoline_amd:
+	retpoline_lfence:
 		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
 			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
 			goto retpoline_generic;
 		}
-		mode = SPECTRE_V2_RETPOLINE_AMD;
-		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD);
+		mode = SPECTRE_V2_LFENCE;
+		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
 	} else {
 	retpoline_generic:
-		mode = SPECTRE_V2_RETPOLINE_GENERIC;
+		mode = SPECTRE_V2_RETPOLINE;
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
 	}
 
diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h
index 765df93de0c6..04a8f0c17609 100644
--- a/tools/arch/x86/include/asm/cpufeatures.h
+++ b/tools/arch/x86/include/asm/cpufeatures.h
@@ -203,7 +203,7 @@
 #define X86_FEATURE_SME			( 7*32+10) /* AMD Secure Memory Encryption */
 #define X86_FEATURE_PTI			( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE		( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_AMD	( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE	( 7*32+13) /* "" Use LFENCEs for Spectre variant 2 */
 #define X86_FEATURE_INTEL_PPIN		( 7*32+14) /* Intel Processor Inventory Number */
 #define X86_FEATURE_CDP_L2		( 7*32+15) /* Code and Data Prioritization L2 */
 #define X86_FEATURE_MSR_SPEC_CTRL	( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
-- 
Gitee


From 5aa35f5ddfd7545a42e28df8fc76936efe7a5f72 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Apr 2022 11:53:24 +0000
Subject: [PATCH 232/340] x86/speculation: Add eIBRS + Retpoline options

stable inclusion
from stable-v4.19.234
commit 3f66bedb96ff4c064a819e68499f79b38297ba26
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit 1e19da8522c81bf46b335f84137165741e0d82b7 upstream.

Thanks to the chaps at VUsec it is now clear that eIBRS is not
sufficient, therefore allow enabling of retpolines along with eIBRS.

Add spectre_v2=eibrs, spectre_v2=eibrs,lfence and
spectre_v2=eibrs,retpoline options to explicitly pick your preferred
means of mitigation.

Since there's new mitigations there's also user visible changes in
/sys/devices/system/cpu/vulnerabilities/spectre_v2 to reflect these
new mitigations.

  [ bp: Massage commit message, trim error messages,
    do more precise eIBRS mode checking. ]

Co-developed-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Patrick Colp <patrick.colp@oracle.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[fllinden@amazon.com: backported to 4.19 (no Hygon)]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/x85/kernel/cpu/bugs.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/include/asm/nospec-branch.h |   4 +-
 arch/x86/kernel/cpu/bugs.c           | 132 +++++++++++++++++++--------
 2 files changed, 98 insertions(+), 38 deletions(-)

diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index 5bc65944fd6e..ddcd4d5eecec 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -225,7 +225,9 @@ enum spectre_v2_mitigation {
 	SPECTRE_V2_NONE,
 	SPECTRE_V2_RETPOLINE,
 	SPECTRE_V2_LFENCE,
-	SPECTRE_V2_IBRS_ENHANCED,
+	SPECTRE_V2_EIBRS,
+	SPECTRE_V2_EIBRS_RETPOLINE,
+	SPECTRE_V2_EIBRS_LFENCE,
 };
 
 /* The indirect branch speculation control variants */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 09d03300a626..9da94659e89f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -622,6 +622,9 @@ enum spectre_v2_mitigation_cmd {
 	SPECTRE_V2_CMD_RETPOLINE,
 	SPECTRE_V2_CMD_RETPOLINE_GENERIC,
 	SPECTRE_V2_CMD_RETPOLINE_LFENCE,
+	SPECTRE_V2_CMD_EIBRS,
+	SPECTRE_V2_CMD_EIBRS_RETPOLINE,
+	SPECTRE_V2_CMD_EIBRS_LFENCE,
 };
 
 enum spectre_v2_user_cmd {
@@ -694,6 +697,13 @@ spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
 	return SPECTRE_V2_USER_CMD_AUTO;
 }
 
+static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+{
+	return (mode == SPECTRE_V2_EIBRS ||
+		mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+		mode == SPECTRE_V2_EIBRS_LFENCE);
+}
+
 static void __init
 spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 {
@@ -761,7 +771,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
 	 */
 	if (!boot_cpu_has(X86_FEATURE_STIBP) ||
 	    !smt_possible ||
-	    spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+	    spectre_v2_in_eibrs_mode(spectre_v2_enabled))
 		return;
 
 	/*
@@ -783,7 +793,9 @@ static const char * const spectre_v2_strings[] = {
 	[SPECTRE_V2_NONE]			= "Vulnerable",
 	[SPECTRE_V2_RETPOLINE]			= "Mitigation: Retpolines",
 	[SPECTRE_V2_LFENCE]			= "Mitigation: LFENCE",
-	[SPECTRE_V2_IBRS_ENHANCED]		= "Mitigation: Enhanced IBRS",
+	[SPECTRE_V2_EIBRS]			= "Mitigation: Enhanced IBRS",
+	[SPECTRE_V2_EIBRS_LFENCE]		= "Mitigation: Enhanced IBRS + LFENCE",
+	[SPECTRE_V2_EIBRS_RETPOLINE]		= "Mitigation: Enhanced IBRS + Retpolines",
 };
 
 static const struct {
@@ -797,6 +809,9 @@ static const struct {
 	{ "retpoline,amd",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
 	{ "retpoline,lfence",	SPECTRE_V2_CMD_RETPOLINE_LFENCE,  false },
 	{ "retpoline,generic",	SPECTRE_V2_CMD_RETPOLINE_GENERIC, false },
+	{ "eibrs",		SPECTRE_V2_CMD_EIBRS,		  false },
+	{ "eibrs,lfence",	SPECTRE_V2_CMD_EIBRS_LFENCE,	  false },
+	{ "eibrs,retpoline",	SPECTRE_V2_CMD_EIBRS_RETPOLINE,	  false },
 	{ "auto",		SPECTRE_V2_CMD_AUTO,		  false },
 };
 
@@ -834,15 +849,29 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 
 	if ((cmd == SPECTRE_V2_CMD_RETPOLINE ||
 	     cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
-	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC) &&
+	     cmd == SPECTRE_V2_CMD_RETPOLINE_GENERIC ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
 	    !IS_ENABLED(CONFIG_RETPOLINE)) {
-		pr_err("%s selected but not compiled in. Switching to AUTO select\n", mitigation_options[i].option);
+		pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+		       mitigation_options[i].option);
+		return SPECTRE_V2_CMD_AUTO;
+	}
+
+	if ((cmd == SPECTRE_V2_CMD_EIBRS ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_RETPOLINE) &&
+	    !boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
+		pr_err("%s selected but CPU doesn't have eIBRS. Switching to AUTO select\n",
+		       mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
-	if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE) &&
+	if ((cmd == SPECTRE_V2_CMD_RETPOLINE_LFENCE ||
+	     cmd == SPECTRE_V2_CMD_EIBRS_LFENCE) &&
 	    !boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-		pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n", mitigation_options[i].option);
+		pr_err("%s selected, but CPU doesn't have a serializing LFENCE. Switching to AUTO select\n",
+		       mitigation_options[i].option);
 		return SPECTRE_V2_CMD_AUTO;
 	}
 
@@ -851,6 +880,24 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void)
 	return cmd;
 }
 
+static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
+{
+	if (!IS_ENABLED(CONFIG_RETPOLINE)) {
+		pr_err("Kernel not compiled with retpoline; no mitigation available!");
+		return SPECTRE_V2_NONE;
+	}
+
+	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
+			pr_err("LFENCE not serializing, switching to generic retpoline\n");
+			return SPECTRE_V2_RETPOLINE;
+		}
+		return SPECTRE_V2_LFENCE;
+	}
+
+	return SPECTRE_V2_RETPOLINE;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
 	enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -871,49 +918,60 @@ static void __init spectre_v2_select_mitigation(void)
 	case SPECTRE_V2_CMD_FORCE:
 	case SPECTRE_V2_CMD_AUTO:
 		if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED)) {
-			mode = SPECTRE_V2_IBRS_ENHANCED;
-			/* Force it so VMEXIT will restore correctly */
-			x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
-			wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
-			goto specv2_set_mode;
+			mode = SPECTRE_V2_EIBRS;
+			break;
 		}
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_auto;
+
+		mode = spectre_v2_select_retpoline();
 		break;
+
 	case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_lfence;
+		mode = SPECTRE_V2_LFENCE;
 		break;
+
 	case SPECTRE_V2_CMD_RETPOLINE_GENERIC:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_generic;
+		mode = SPECTRE_V2_RETPOLINE;
 		break;
+
 	case SPECTRE_V2_CMD_RETPOLINE:
-		if (IS_ENABLED(CONFIG_RETPOLINE))
-			goto retpoline_auto;
+		mode = spectre_v2_select_retpoline();
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS:
+		mode = SPECTRE_V2_EIBRS;
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS_LFENCE:
+		mode = SPECTRE_V2_EIBRS_LFENCE;
+		break;
+
+	case SPECTRE_V2_CMD_EIBRS_RETPOLINE:
+		mode = SPECTRE_V2_EIBRS_RETPOLINE;
 		break;
 	}
-	pr_err("Spectre mitigation: kernel not compiled with retpoline; no mitigation available!");
-	return;
 
-retpoline_auto:
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
-	    boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
-	retpoline_lfence:
-		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-			pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n");
-			goto retpoline_generic;
-		}
-		mode = SPECTRE_V2_LFENCE;
+	if (spectre_v2_in_eibrs_mode(mode)) {
+		/* Force it so VMEXIT will restore correctly */
+		x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
+		wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+	}
+
+	switch (mode) {
+	case SPECTRE_V2_NONE:
+	case SPECTRE_V2_EIBRS:
+		break;
+
+	case SPECTRE_V2_LFENCE:
+	case SPECTRE_V2_EIBRS_LFENCE:
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
+		/* fallthrough */
+
+	case SPECTRE_V2_RETPOLINE:
+	case SPECTRE_V2_EIBRS_RETPOLINE:
 		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
-	} else {
-	retpoline_generic:
-		mode = SPECTRE_V2_RETPOLINE;
-		setup_force_cpu_cap(X86_FEATURE_RETPOLINE);
+		break;
 	}
 
-specv2_set_mode:
 	spectre_v2_enabled = mode;
 	pr_info("%s\n", spectre_v2_strings[mode]);
 
@@ -939,7 +997,7 @@ static void __init spectre_v2_select_mitigation(void)
 	 * the CPU supports Enhanced IBRS, kernel might un-intentionally not
 	 * enable IBRS around firmware calls.
 	 */
-	if (boot_cpu_has(X86_FEATURE_IBRS) && mode != SPECTRE_V2_IBRS_ENHANCED) {
+	if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
 		setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
 		pr_info("Enabling Restricted Speculation for firmware calls\n");
 	}
@@ -1584,7 +1642,7 @@ static ssize_t tsx_async_abort_show_state(char *buf)
 
 static char *stibp_state(void)
 {
-	if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+	if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
 		return "";
 
 	switch (spectre_v2_user_stibp) {
-- 
Gitee


From f72da20eb6593c02cef40233f34c6f9db89b1489 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 20 Apr 2022 11:53:25 +0000
Subject: [PATCH 233/340] Documentation/hw-vuln: Update spectre doc

stable inclusion
from stable-v4.19.234
commit 7af95ef3ec6248696300fce5c68f6c8c4f50e4a4
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit 5ad3eb1132453b9795ce5fd4572b1c18b292cca9 upstream.

Update the doc with the new fun.

  [ bp: Massage commit message. ]

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[fllinden@amazon.com: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/admin-guide/hw-vuln/spectre.rst | 42 +++++++++++++------
 .../admin-guide/kernel-parameters.txt         |  8 +++-
 2 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index e05e581af5cf..4776c112fe7d 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -131,6 +131,19 @@ steer its indirect branch speculations to gadget code, and measure the
 speculative execution's side effects left in level 1 cache to infer the
 victim's data.
 
+Yet another variant 2 attack vector is for the attacker to poison the
+Branch History Buffer (BHB) to speculatively steer an indirect branch
+to a specific Branch Target Buffer (BTB) entry, even if the entry isn't
+associated with the source address of the indirect branch. Specifically,
+the BHB might be shared across privilege levels even in the presence of
+Enhanced IBRS.
+
+Currently the only known real-world BHB attack vector is via
+unprivileged eBPF. Therefore, it's highly recommended to not enable
+unprivileged eBPF, especially when eIBRS is used (without retpolines).
+For a full mitigation against BHB attacks, it's recommended to use
+retpolines (or eIBRS combined with retpolines).
+
 Attack scenarios
 ----------------
 
@@ -364,13 +377,15 @@ The possible values in this file are:
 
   - Kernel status:
 
-  ====================================  =================================
-  'Not affected'                        The processor is not vulnerable
-  'Vulnerable'                          Vulnerable, no mitigation
-  'Mitigation: Full generic retpoline'  Software-focused mitigation
-  'Mitigation: Full AMD retpoline'      AMD-specific software mitigation
-  'Mitigation: Enhanced IBRS'           Hardware-focused mitigation
-  ====================================  =================================
+  ========================================  =================================
+  'Not affected'                            The processor is not vulnerable
+  'Mitigation: None'                        Vulnerable, no mitigation
+  'Mitigation: Retpolines'                  Use Retpoline thunks
+  'Mitigation: LFENCE'                      Use LFENCE instructions
+  'Mitigation: Enhanced IBRS'               Hardware-focused mitigation
+  'Mitigation: Enhanced IBRS + Retpolines'  Hardware-focused + Retpolines
+  'Mitigation: Enhanced IBRS + LFENCE'      Hardware-focused + LFENCE
+  ========================================  =================================
 
   - Firmware status: Show if Indirect Branch Restricted Speculation (IBRS) is
     used to protect against Spectre variant 2 attacks when calling firmware (x86 only).
@@ -584,12 +599,13 @@ kernel command line.
 
 		Specific mitigations can also be selected manually:
 
-		retpoline
-					replace indirect branches
-		retpoline,generic
-					google's original retpoline
-		retpoline,amd
-					AMD-specific minimal thunk
+                retpoline               auto pick between generic,lfence
+                retpoline,generic       Retpolines
+                retpoline,lfence        LFENCE; indirect branch
+                retpoline,amd           alias for retpoline,lfence
+                eibrs                   enhanced IBRS
+                eibrs,retpoline         enhanced IBRS + Retpolines
+                eibrs,lfence            enhanced IBRS + LFENCE
 
 		Not specifying this option is equivalent to
 		spectre_v2=auto.
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 170e25b192a1..212832a5ca68 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4413,8 +4413,12 @@
 			Specific mitigations can also be selected manually:
 
 			retpoline	  - replace indirect branches
-			retpoline,generic - google's original retpoline
-			retpoline,amd     - AMD-specific minimal thunk
+			retpoline,generic - Retpolines
+			retpoline,lfence  - LFENCE; indirect branch
+			retpoline,amd     - alias for retpoline,lfence
+			eibrs		  - enhanced IBRS
+			eibrs,retpoline   - enhanced IBRS + Retpolines
+			eibrs,lfence      - enhanced IBRS + LFENCE
 
 			Not specifying this option is equivalent to
 			spectre_v2=auto.
-- 
Gitee


From 8fbdf654b00fdf629be3b94c4f64182b9522774a Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Apr 2022 11:53:26 +0000
Subject: [PATCH 234/340] x86/speculation: Include unprivileged eBPF status in
 Spectre v2 mitigation reporting

stable inclusion
from stable-v4.19.234
commit 995629e1d8e6751936c6e2b738f70b392b0461de
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit 44a3918c8245ab10c6c9719dd12e7a8d291980d8 upstream.

With unprivileged eBPF enabled, eIBRS (without retpoline) is vulnerable
to Spectre v2 BHB-based attacks.

When both are enabled, print a warning message and report it in the
'spectre_v2' sysfs vulnerabilities file.

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
[fllinden@amazon.com: backported to 4.19]
Signed-off-by: Frank van der Linden <fllinden@amazon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    kernel/sysctl.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++++++++------
 include/linux/bpf.h        | 11 +++++++++++
 2 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9da94659e89f..9a9e5ac44bd5 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -31,6 +31,7 @@
 #include <asm/intel-family.h>
 #include <asm/e820/api.h>
 #include <asm/hypervisor.h>
+#include <linux/bpf.h>
 
 #include "cpu.h"
 
@@ -607,6 +608,16 @@ static inline const char *spectre_v2_module_string(void)
 static inline const char *spectre_v2_module_string(void) { return ""; }
 #endif
 
+#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+
+#ifdef CONFIG_BPF_SYSCALL
+void unpriv_ebpf_notify(int new_state)
+{
+	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && !new_state)
+		pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+}
+#endif
+
 static inline bool match_option(const char *arg, int arglen, const char *opt)
 {
 	int len = strlen(opt);
@@ -950,6 +961,9 @@ static void __init spectre_v2_select_mitigation(void)
 		break;
 	}
 
+	if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+		pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+
 	if (spectre_v2_in_eibrs_mode(mode)) {
 		/* Force it so VMEXIT will restore correctly */
 		x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
@@ -1672,6 +1686,20 @@ static char *ibpb_state(void)
 	return "";
 }
 
+static ssize_t spectre_v2_show_state(char *buf)
+{
+	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
+		return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
+	return sprintf(buf, "%s%s%s%s%s%s\n",
+		       spectre_v2_strings[spectre_v2_enabled],
+		       ibpb_state(),
+		       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+		       stibp_state(),
+		       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
+		       spectre_v2_module_string());
+}
+
 static ssize_t srbds_show_state(char *buf)
 {
 	return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
@@ -1697,12 +1725,7 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr
 		return sprintf(buf, "%s\n", spectre_v1_strings[spectre_v1_mitigation]);
 
 	case X86_BUG_SPECTRE_V2:
-		return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
-			       ibpb_state(),
-			       boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
-			       stibp_state(),
-			       boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "",
-			       spectre_v2_module_string());
+		return spectre_v2_show_state(buf);
 
 	case X86_BUG_SPEC_STORE_BYPASS:
 		return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c40fa25cf2ca..30fcf812df9d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -546,6 +546,11 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr)
 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type);
 int array_map_alloc_check(union bpf_attr *attr);
 
+static inline bool unprivileged_ebpf_enabled(void)
+{
+	return !sysctl_unprivileged_bpf_disabled;
+}
+
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -657,6 +662,12 @@ static inline struct bpf_prog *bpf_prog_get_type_path(const char *name,
 {
 	return ERR_PTR(-EOPNOTSUPP);
 }
+
+static inline bool unprivileged_ebpf_enabled(void)
+{
+	return false;
+}
+
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
-- 
Gitee


From 802b5d021cbd79845326b08da67e6c73d8e5896f Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Wed, 20 Apr 2022 11:53:27 +0000
Subject: [PATCH 235/340] x86/speculation: Use generic retpoline by default on
 AMD

stable inclusion
from stable-v4.19.234
commit d3cb3a6927222268a10b2f12dfb8c9444f7cc39e
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit 244d00b5dd4755f8df892c86cab35fb2cfd4f14b upstream.

AMD retpoline may be susceptible to speculation. The speculation
execution window for an incorrect indirect branch prediction using
LFENCE/JMP sequence may potentially be large enough to allow
exploitation using Spectre V2.

By default, don't use retpoline,lfence on AMD.  Instead, use the
generic retpoline.

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 9a9e5ac44bd5..dc9ffb9b7615 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -898,14 +898,6 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void)
 		return SPECTRE_V2_NONE;
 	}
 
-	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
-		if (!boot_cpu_has(X86_FEATURE_LFENCE_RDTSC)) {
-			pr_err("LFENCE not serializing, switching to generic retpoline\n");
-			return SPECTRE_V2_RETPOLINE;
-		}
-		return SPECTRE_V2_LFENCE;
-	}
-
 	return SPECTRE_V2_RETPOLINE;
 }
 
-- 
Gitee


From de9842d50028c34b59eae563bdb7628564c6f05d Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@amd.com>
Date: Wed, 20 Apr 2022 11:53:28 +0000
Subject: [PATCH 236/340] x86/speculation: Update link to AMD speculation
 whitepaper

stable inclusion
from stable-v4.19.234
commit c034d344e733a3ac574dd09e39e911a50025c607
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit e9b6013a7ce31535b04b02ba99babefe8a8599fa upstream.

Update the link to the "Software Techniques for Managing Speculation
on AMD Processors" whitepaper.

Signed-off-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 Documentation/admin-guide/hw-vuln/spectre.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/admin-guide/hw-vuln/spectre.rst b/Documentation/admin-guide/hw-vuln/spectre.rst
index 4776c112fe7d..3a6e1fcad3c0 100644
--- a/Documentation/admin-guide/hw-vuln/spectre.rst
+++ b/Documentation/admin-guide/hw-vuln/spectre.rst
@@ -60,8 +60,8 @@ privileged data touched during the speculative execution.
 Spectre variant 1 attacks take advantage of speculative execution of
 conditional branches, while Spectre variant 2 attacks use speculative
 execution of indirect branches to leak privileged memory.
-See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[7] <spec_ref7>`
-:ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
+See :ref:`[1] <spec_ref1>` :ref:`[5] <spec_ref5>` :ref:`[6] <spec_ref6>`
+:ref:`[7] <spec_ref7>` :ref:`[10] <spec_ref10>` :ref:`[11] <spec_ref11>`.
 
 Spectre variant 1 (Bounds Check Bypass)
 ---------------------------------------
@@ -746,7 +746,7 @@ AMD white papers:
 
 .. _spec_ref6:
 
-[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/90343-B_SoftwareTechniquesforManagingSpeculation_WP_7-18Update_FNL.pdf>`_.
+[6] `Software techniques for managing speculation on AMD processors <https://developer.amd.com/wp-content/resources/Managing-Speculation-on-AMD-Processors.pdf>`_.
 
 ARM white papers:
 
-- 
Gitee


From 50f090e8089195a9df8f9c690103268caa88a7f3 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Apr 2022 11:53:29 +0000
Subject: [PATCH 237/340] x86/speculation: Warn about Spectre v2 LFENCE
 mitigation

stable inclusion
from stable-v4.19.234
commit 8bfdba77595aee5c3e83ed1c9994c35d6d409605
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit eafd987d4a82c7bb5aa12f0e3b4f8f3dea93e678 upstream.

With:

  f8a66d608a3e ("x86,bugs: Unconditionally allow spectre_v2=retpoline,amd")

it became possible to enable the LFENCE "retpoline" on Intel. However,
Intel doesn't recommend it, as it has some weaknesses compared to
retpoline.

Now AMD doesn't recommend it either.

It can still be left available as a cmdline option. It's faster than
retpoline but is weaker in certain scenarios -- particularly SMT, but
even non-SMT may be vulnerable in some cases.

So just unconditionally warn if the user requests it on the cmdline.

  [ bp: Massage commit message. ]

Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dc9ffb9b7615..138bb1b0093e 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -608,6 +608,7 @@ static inline const char *spectre_v2_module_string(void)
 static inline const char *spectre_v2_module_string(void) { return ""; }
 #endif
 
+#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
 #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
 
 #ifdef CONFIG_BPF_SYSCALL
@@ -929,6 +930,7 @@ static void __init spectre_v2_select_mitigation(void)
 		break;
 
 	case SPECTRE_V2_CMD_RETPOLINE_LFENCE:
+		pr_err(SPECTRE_V2_LFENCE_MSG);
 		mode = SPECTRE_V2_LFENCE;
 		break;
 
@@ -1680,6 +1682,9 @@ static char *ibpb_state(void)
 
 static ssize_t spectre_v2_show_state(char *buf)
 {
+	if (spectre_v2_enabled == SPECTRE_V2_LFENCE)
+		return sprintf(buf, "Vulnerable: LFENCE\n");
+
 	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
 		return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
 
-- 
Gitee


From 5308da81e586a2f124b9f5a12818dcba057ab786 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Wed, 20 Apr 2022 11:53:30 +0000
Subject: [PATCH 238/340] x86/speculation: Warn about eIBRS + LFENCE +
 Unprivileged eBPF + SMT

stable inclusion
from stable-v4.19.234
commit 9711b12a3f4c0fc73dd257c1e467e6e42155a5f1
category: bugfix
bugzilla: 186453, https://gitee.com/src-openeuler/kernel/issues/I50WBM
CVE: CVE-2022-0001

--------------------------------

commit 0de05d056afdb00eca8c7bbb0c79a3438daf700c upstream.

The commit

   44a3918c8245 ("x86/speculation: Include unprivileged eBPF status in Spectre v2 mitigation reporting")

added a warning for the "eIBRS + unprivileged eBPF" combination, which
has been shown to be vulnerable against Spectre v2 BHB-based attacks.

However, there's no warning about the "eIBRS + LFENCE retpoline +
unprivileged eBPF" combo. The LFENCE adds more protection by shortening
the speculation window after a mispredicted branch. That makes an attack
significantly more difficult, even with unprivileged eBPF. So at least
for now the logic doesn't warn about that combination.

But if you then add SMT into the mix, the SMT attack angle weakens the
effectiveness of the LFENCE considerably.

So extend the "eIBRS + unprivileged eBPF" warning to also include the
"eIBRS + LFENCE + unprivileged eBPF + SMT" case.

  [ bp: Massage commit message. ]

Suggested-by: Alyssa Milburn <alyssa.milburn@linux.intel.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 138bb1b0093e..4651e9db46a6 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -610,12 +610,27 @@ static inline const char *spectre_v2_module_string(void) { return ""; }
 
 #define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
 #define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
 
 #ifdef CONFIG_BPF_SYSCALL
 void unpriv_ebpf_notify(int new_state)
 {
-	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && !new_state)
+	if (new_state)
+		return;
+
+	/* Unprivileged eBPF is enabled */
+
+	switch (spectre_v2_enabled) {
+	case SPECTRE_V2_EIBRS:
 		pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
+		break;
+	case SPECTRE_V2_EIBRS_LFENCE:
+		if (sched_smt_active())
+			pr_err(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+		break;
+	default:
+		break;
+	}
 }
 #endif
 
@@ -1075,6 +1090,10 @@ void arch_smt_update(void)
 {
 	mutex_lock(&spec_ctrl_mutex);
 
+	if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+		pr_warn_once(SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG);
+
 	switch (spectre_v2_user_stibp) {
 	case SPECTRE_V2_USER_NONE:
 		break;
@@ -1686,7 +1705,11 @@ static ssize_t spectre_v2_show_state(char *buf)
 		return sprintf(buf, "Vulnerable: LFENCE\n");
 
 	if (spectre_v2_enabled == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
-		return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+		return sprintf(buf, "Vulnerable: eIBRS with unprivileged eBPF\n");
+
+	if (sched_smt_active() && unprivileged_ebpf_enabled() &&
+	    spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE)
+		return sprintf(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n");
 
 	return sprintf(buf, "%s%s%s%s%s%s\n",
 		       spectre_v2_strings[spectre_v2_enabled],
-- 
Gitee


From 357bd9f9f1bb86c9607957a3a959d791f80dcbf4 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:31 +0000
Subject: [PATCH 239/340] arm64: entry.S: Add ventry overflow sanity checks

stable inclusion
from stable-v4.19.236
commit e8bfe29afc09ac77b347540a0f4c789e6530a436
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 4330e2c5c04c27bebf89d34e0bc14e6943413067 upstream.

Subsequent patches add even more code to the ventry slots.
Ensure kernels that overflow a ventry slot don't get built.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9a044d425502..33a1cbe98e36 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -70,6 +70,7 @@
 
 	.macro kernel_ventry, el, label, regsize = 64
 	.align 7
+.Lventry_start\@:
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 alternative_if ARM64_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
@@ -127,6 +128,7 @@ alternative_else_nop_endif
 	mrs	x0, tpidrro_el0
 #endif
 	b	el\()\el\()_\label
+.org .Lventry_start\@ + 128	// Did we overflow the ventry slot?
 	.endm
 
 	.macro tramp_alias, dst, sym
@@ -1084,6 +1086,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	add	x30, x30, #(1b - tramp_vectors)
 	isb
 	ret
+.org 1b + 128	// Did we overflow the ventry slot?
 	.endm
 
 	.macro tramp_exit, regsize = 64
-- 
Gitee


From 7a79d423decaf8a0501343781e2e2480c1470823 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:32 +0000
Subject: [PATCH 240/340] arm64: entry: Make the trampoline cleanup optional

stable inclusion
from stable-v4.19.236
commit 87eccd56c52fcdd6c55b048d789da5c9c2e51ed3
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit d739da1694a0eaef0358a42b76904b611539b77b upstream.

Subsequent patches will add additional sets of vectors that use
the same tricks as the kpti vectors to reach the full-fat vectors.
The full-fat vectors contain some cleanup for kpti that is patched
in by alternatives when kpti is in use. Once there are additional
vectors, the cleanup will be needed in more cases.

But on big/little systems, the cleanup would be harmful if no
trampoline vector were in use. Instead of forcing CPUs that don't
need a trampoline vector to use one, make the trampoline cleanup
optional.

Entry at the top of the vectors will skip the cleanup. The trampoline
vectors can then skip the first instruction, triggering the cleanup
to run.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 33a1cbe98e36..ad5432e73538 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -72,16 +72,20 @@
 	.align 7
 .Lventry_start\@:
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-alternative_if ARM64_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
+	/*
+	 * This must be the first instruction of the EL0 vector entries. It is
+	 * skipped by the trampoline vectors, to trigger the cleanup.
+	 */
+	b	.Lskip_tramp_vectors_cleanup\@
 	.if	\regsize == 64
 	mrs	x30, tpidrro_el0
 	msr	tpidrro_el0, xzr
 	.else
 	mov	x30, xzr
 	.endif
+.Lskip_tramp_vectors_cleanup\@:
 	.endif
-alternative_else_nop_endif
 #endif
 
 	sub	sp, sp, #S_FRAME_SIZE
@@ -1083,7 +1087,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 #endif
 	prfm	plil1strm, [x30, #(1b - tramp_vectors)]
 	msr	vbar_el1, x30
-	add	x30, x30, #(1b - tramp_vectors)
+	add	x30, x30, #(1b - tramp_vectors + 4)
 	isb
 	ret
 .org 1b + 128	// Did we overflow the ventry slot?
-- 
Gitee


From c74a988abc79808cdd1c051e00670f541159487b Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:33 +0000
Subject: [PATCH 241/340] arm64: entry: Free up another register on kpti's
 tramp_exit path

stable inclusion
from stable-v4.19.236
commit 51acb81130d1feee7fd043760b75f5377ab8d4f0
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 03aff3a77a58b5b52a77e00537a42090ad57b80b upstream.

Kpti stashes x30 in far_el1 while it uses x30 for all its work.

Making the vectors a per-cpu data structure will require a second
register.

Allow tramp_exit two registers before it unmaps the kernel, by
leaving x30 on the stack, and stashing x29 in far_el1.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ad5432e73538..29d15f5bccdd 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -365,18 +365,20 @@ alternative_else_nop_endif
 	ldp	x24, x25, [sp, #16 * 12]
 	ldp	x26, x27, [sp, #16 * 13]
 	ldp	x28, x29, [sp, #16 * 14]
-	ldr	lr, [sp, #S_LR]
-	add	sp, sp, #S_FRAME_SIZE		// restore sp
 	/*
 	 * ARCH_HAS_MEMBARRIER_SYNC_CORE rely on eret context synchronization
 	 * when returning from IPI handler, and when returning to user-space.
 	 */
 
 	.if	\el == 0
-alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
+alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #S_FRAME_SIZE		// restore sp
+	eret
+alternative_else_nop_endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	bne	4f
-	msr	far_el1, x30
+	msr	far_el1, x29
 	tramp_alias	x30, tramp_exit_native
 	br	x30
 4:
@@ -384,6 +386,8 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
 	br	x30
 #endif
 	.else
+	ldr	lr, [sp, #S_LR]
+	add	sp, sp, #S_FRAME_SIZE		// restore sp
 	eret
 	.endif
 	.endm
@@ -1096,10 +1100,12 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	.macro tramp_exit, regsize = 64
 	adr	x30, tramp_vectors
 	msr	vbar_el1, x30
-	tramp_unmap_kernel	x30
+	ldr	lr, [sp, #S_LR]
+	tramp_unmap_kernel	x29
 	.if	\regsize == 64
-	mrs	x30, far_el1
+	mrs	x29, far_el1
 	.endif
+	add	sp, sp, #S_FRAME_SIZE		// restore sp
 	eret
 	.endm
 
-- 
Gitee


From 88f3bf9c621102b8540696b1ce08e5292c6285c6 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:34 +0000
Subject: [PATCH 242/340] arm64: entry: Move the trampoline data page before
 the text page

stable inclusion
from stable-v4.19.236
commit 266b1ef1368e06ac4c5a89eb9774ef2bbaa54e19
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit c091fb6ae059cda563b2a4d93fdbc548ef34e1d6 upstream.

The trampoline code has a data page that holds the address of the vectors,
which is unmapped when running in user-space. This ensures that with
CONFIG_RANDOMIZE_BASE, the randomised address of the kernel can't be
discovered until after the kernel has been mapped.

If the trampoline text page is extended to include multiple sets of
vectors, it will be larger than a single page, making it tricky to
find the data page without knowing the size of the trampoline text
pages, which will vary with PAGE_SIZE.

Move the data page to appear before the text page. This allows the
data page to be found without knowing the size of the trampoline text
pages. 'tramp_vectors' is used to refer to the beginning of the
.entry.tramp.text section, do that explicitly.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/fixmap.h | 2 +-
 arch/arm64/kernel/entry.S       | 9 +++++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index ec1e6d6fa14c..c0cfc6d3bf9f 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -59,8 +59,8 @@ enum fixed_addresses {
 #endif /* CONFIG_ACPI_APEI_GHES */
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	FIX_ENTRY_TRAMP_DATA,
 	FIX_ENTRY_TRAMP_TEXT,
+	FIX_ENTRY_TRAMP_DATA,
 #define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 	__end_of_permanent_fixed_addresses,
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 29d15f5bccdd..8de799e92970 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1067,6 +1067,11 @@ alternative_else_nop_endif
 	 */
 	.endm
 
+	.macro tramp_data_page	dst
+	adr	\dst, .entry.tramp.text
+	sub	\dst, \dst, PAGE_SIZE
+	.endm
+
 	.macro tramp_ventry, regsize = 64
 	.align	7
 1:
@@ -1083,7 +1088,7 @@ alternative_else_nop_endif
 2:
 	tramp_map_kernel	x30
 #ifdef CONFIG_RANDOMIZE_BASE
-	adr	x30, tramp_vectors + PAGE_SIZE
+	tramp_data_page		x30
 alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	ldr	x30, [x30]
 #else
@@ -1231,7 +1236,7 @@ ENTRY(__sdei_asm_entry_trampoline)
 1:	str	x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
 
 #ifdef CONFIG_RANDOMIZE_BASE
-	adr	x4, tramp_vectors + PAGE_SIZE
+	tramp_data_page		x4
 	add	x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
 	ldr	x4, [x4]
 #else
-- 
Gitee


From 656f57bdf30b41356f47c132e40577eedfa0253a Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:35 +0000
Subject: [PATCH 243/340] arm64: entry: Allow tramp_alias to access symbols
 after the 4K boundary

stable inclusion
from stable-v4.19.236
commit ebcdd80d0016c7445e8395cec99b9ce266a26001
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 6c5bf79b69f911560fbf82214c0971af6e58e682 upstream.

Systems using kpti enter and exit the kernel through a trampoline mapping
that is always mapped, even when the kernel is not. tramp_valias is a macro
to find the address of a symbol in the trampoline mapping.

Adding extra sets of vectors will expand the size of the entry.tramp.text
section to beyond 4K. tramp_valias will be unable to generate addresses
for symbols beyond 4K as it uses the 12 bit immediate of the add
instruction.

As there are now two registers available when tramp_alias is called,
use the extra register to avoid the 4K limit of the 12 bit immediate.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8de799e92970..15e117d7e290 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -135,9 +135,12 @@
 .org .Lventry_start\@ + 128	// Did we overflow the ventry slot?
 	.endm
 
-	.macro tramp_alias, dst, sym
+	.macro tramp_alias, dst, sym, tmp
 	mov_q	\dst, TRAMP_VALIAS
-	add	\dst, \dst, #(\sym - .entry.tramp.text)
+	adr_l	\tmp, \sym
+	add	\dst, \dst, \tmp
+	adr_l	\tmp, .entry.tramp.text
+	sub	\dst, \dst, \tmp
 	.endm
 
 	// This macro corrupts x0-x3. It is the caller's duty
@@ -379,10 +382,10 @@ alternative_else_nop_endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	bne	4f
 	msr	far_el1, x29
-	tramp_alias	x30, tramp_exit_native
+	tramp_alias	x30, tramp_exit_native, x29
 	br	x30
 4:
-	tramp_alias	x30, tramp_exit_compat
+	tramp_alias	x30, tramp_exit_compat, x29
 	br	x30
 #endif
 	.else
@@ -1365,7 +1368,7 @@ alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0
 alternative_else_nop_endif
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline
+	tramp_alias	dst=x5, sym=__sdei_asm_exit_trampoline, tmp=x3
 	br	x5
 #endif
 ENDPROC(__sdei_asm_handler)
-- 
Gitee


From c13aede9aa9d5f2fa76794185836401f14c2d76c Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:36 +0000
Subject: [PATCH 244/340] arm64: entry: Don't assume tramp_vectors is the start
 of the vectors

stable inclusion
from stable-v4.19.236
commit af484e69b5e83095609d8b5c8abaf13a5460229e
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit ed50da7764535f1e24432ded289974f2bf2b0c5a upstream.

The tramp_ventry macro uses tramp_vectors as the address of the vectors
when calculating which ventry in the 'full fat' vectors to branch to.

While there is one set of tramp_vectors, this will be true.
Adding multiple sets of vectors will break this assumption.

Move the generation of the vectors to a macro, and pass the start
of the vectors as an argument to tramp_ventry.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 15e117d7e290..1dc27e6503e0 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1075,7 +1075,7 @@ alternative_else_nop_endif
 	sub	\dst, \dst, PAGE_SIZE
 	.endm
 
-	.macro tramp_ventry, regsize = 64
+	.macro tramp_ventry, vector_start, regsize
 	.align	7
 1:
 	.if	\regsize == 64
@@ -1097,9 +1097,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 #else
 	ldr	x30, =vectors
 #endif
-	prfm	plil1strm, [x30, #(1b - tramp_vectors)]
+	prfm	plil1strm, [x30, #(1b - \vector_start)]
 	msr	vbar_el1, x30
-	add	x30, x30, #(1b - tramp_vectors + 4)
+	add	x30, x30, #(1b - \vector_start + 4)
 	isb
 	ret
 .org 1b + 128	// Did we overflow the ventry slot?
@@ -1117,19 +1117,21 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	eret
 	.endm
 
-	.align	11
-ENTRY(tramp_vectors)
+	.macro	generate_tramp_vector
+.Lvector_start\@:
 	.space	0x400
 
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
-	tramp_ventry
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 64
+	.endr
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 32
+	.endr
+	.endm
 
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
-	tramp_ventry	32
+	.align	11
+ENTRY(tramp_vectors)
+	generate_tramp_vector
 END(tramp_vectors)
 
 ENTRY(tramp_exit_native)
-- 
Gitee


From 8b3358cfec8da4ac2837cdf5a7dc4ff9c05ac98a Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:37 +0000
Subject: [PATCH 245/340] arm64: entry: Move trampoline macros out of ifdef'd
 section

stable inclusion
from stable-v4.19.236
commit f689fa53bb944873f75fe1584f446cae1aabd2c1
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 13d7a08352a83ef2252aeb464a5e08dfc06b5dfd upstream.

The macros for building the kpti trampoline are all behind
CONFIG_UNMAP_KERNEL_AT_EL0, and in a region that outputs to the
.entry.tramp.text section.

Move the macros out so they can be used to generate other kinds of
trampoline. Only the symbols need to be guarded by
CONFIG_UNMAP_KERNEL_AT_EL0 and appear in the .entry.tramp.text section.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1dc27e6503e0..f9a89b146600 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1033,12 +1033,7 @@ ENDPROC(el0_svc)
 
 	.popsection				// .entry.text
 
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-/*
- * Exception vectors trampoline.
- */
-	.pushsection ".entry.tramp.text", "ax"
-
+	// Move from tramp_pg_dir to swapper_pg_dir
 	.macro tramp_map_kernel, tmp
 	mrs	\tmp, ttbr1_el1
 	add	\tmp, \tmp, #(PAGE_SIZE + RESERVED_TTBR0_SIZE)
@@ -1129,6 +1124,11 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	.endr
 	.endm
 
+#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
+/*
+ * Exception vectors trampoline.
+ */
+	.pushsection ".entry.tramp.text", "ax"
 	.align	11
 ENTRY(tramp_vectors)
 	generate_tramp_vector
-- 
Gitee


From dc80b5a135110ad0ae99c2bf01b653d1d907feb2 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:38 +0000
Subject: [PATCH 246/340] arm64: entry: Make the kpti trampoline's kpti
 sequence optional

stable inclusion
from stable-v4.19.236
commit 9e056623dfc538909ed2a914f70a66d68ec71ec3
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit c47e4d04ba0f1ea17353d85d45f611277507e07a upstream.

Spectre-BHB needs to add sequences to the vectors. Having one global
set of vectors is a problem for big/little systems where the sequence
is costly on cpus that are not vulnerable.

Making the vectors per-cpu in the style of KVM's bh_harden_hyp_vecs
requires the vectors to be generated by macros.

Make the kpti re-mapping of the kernel optional, so the macros can be
used without kpti.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f9a89b146600..8500997474c4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1070,9 +1070,10 @@ alternative_else_nop_endif
 	sub	\dst, \dst, PAGE_SIZE
 	.endm
 
-	.macro tramp_ventry, vector_start, regsize
+	.macro tramp_ventry, vector_start, regsize, kpti
 	.align	7
 1:
+	.if	\kpti == 1
 	.if	\regsize == 64
 	msr	tpidrro_el0, x30	// Restored in kernel_ventry
 	.endif
@@ -1094,8 +1095,12 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 #endif
 	prfm	plil1strm, [x30, #(1b - \vector_start)]
 	msr	vbar_el1, x30
-	add	x30, x30, #(1b - \vector_start + 4)
 	isb
+	.else
+	ldr	x30, =vectors
+	.endif // \kpti == 1
+
+	add	x30, x30, #(1b - \vector_start + 4)
 	ret
 .org 1b + 128	// Did we overflow the ventry slot?
 	.endm
@@ -1112,15 +1117,15 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	eret
 	.endm
 
-	.macro	generate_tramp_vector
+	.macro	generate_tramp_vector,	kpti
 .Lvector_start\@:
 	.space	0x400
 
 	.rept	4
-	tramp_ventry	.Lvector_start\@, 64
+	tramp_ventry	.Lvector_start\@, 64, \kpti
 	.endr
 	.rept	4
-	tramp_ventry	.Lvector_start\@, 32
+	tramp_ventry	.Lvector_start\@, 32, \kpti
 	.endr
 	.endm
 
@@ -1131,7 +1136,7 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	.pushsection ".entry.tramp.text", "ax"
 	.align	11
 ENTRY(tramp_vectors)
-	generate_tramp_vector
+	generate_tramp_vector	kpti=1
 END(tramp_vectors)
 
 ENTRY(tramp_exit_native)
-- 
Gitee


From 6c8a2f25a1c12217d8e1cdc3ee7320e628fc4013 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:39 +0000
Subject: [PATCH 247/340] arm64: entry: Allow the trampoline text to occupy
 multiple pages

stable inclusion
from stable-v4.19.236
commit 22fdfcf1c2cea8e6dc383d46cbbe59d476d24a96
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit a9c406e6462ff14956d690de7bbe5131a5677dc9 upstream.

Adding a second set of vectors to .entry.tramp.text will make it
larger than a single 4K page.

Allow the trampoline text to occupy up to three pages by adding two
more fixmap slots. Previous changes to tramp_valias allowed it to reach
beyond a single page.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/fixmap.h   |  6 ++++--
 arch/arm64/include/asm/sections.h |  5 +++++
 arch/arm64/kernel/entry.S         |  2 +-
 arch/arm64/kernel/vmlinux.lds.S   |  2 +-
 arch/arm64/mm/mmu.c               | 12 +++++++++---
 5 files changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/arm64/include/asm/fixmap.h b/arch/arm64/include/asm/fixmap.h
index c0cfc6d3bf9f..3c962ef081f8 100644
--- a/arch/arm64/include/asm/fixmap.h
+++ b/arch/arm64/include/asm/fixmap.h
@@ -59,9 +59,11 @@ enum fixed_addresses {
 #endif /* CONFIG_ACPI_APEI_GHES */
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-	FIX_ENTRY_TRAMP_TEXT,
+	FIX_ENTRY_TRAMP_TEXT3,
+	FIX_ENTRY_TRAMP_TEXT2,
+	FIX_ENTRY_TRAMP_TEXT1,
 	FIX_ENTRY_TRAMP_DATA,
-#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT))
+#define TRAMP_VALIAS		(__fix_to_virt(FIX_ENTRY_TRAMP_TEXT1))
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 	__end_of_permanent_fixed_addresses,
 
diff --git a/arch/arm64/include/asm/sections.h b/arch/arm64/include/asm/sections.h
index caab039d6305..8d3f1eab58e0 100644
--- a/arch/arm64/include/asm/sections.h
+++ b/arch/arm64/include/asm/sections.h
@@ -30,4 +30,9 @@ extern char __irqentry_text_start[], __irqentry_text_end[];
 extern char __mmuoff_data_start[], __mmuoff_data_end[];
 extern char __entry_tramp_text_start[], __entry_tramp_text_end[];
 
+static inline size_t entry_tramp_text_size(void)
+{
+	return __entry_tramp_text_end - __entry_tramp_text_start;
+}
+
 #endif /* __ASM_SECTIONS_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 8500997474c4..2c34f5d9db0a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1066,7 +1066,7 @@ alternative_else_nop_endif
 	.endm
 
 	.macro tramp_data_page	dst
-	adr	\dst, .entry.tramp.text
+	adr_l	\dst, .entry.tramp.text
 	sub	\dst, \dst, PAGE_SIZE
 	.endm
 
diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S
index d6050c6e65bc..2b965dd67881 100644
--- a/arch/arm64/kernel/vmlinux.lds.S
+++ b/arch/arm64/kernel/vmlinux.lds.S
@@ -251,7 +251,7 @@ ASSERT(__hibernate_exit_text_end - (__hibernate_exit_text_start & ~(SZ_4K - 1))
 	<= SZ_4K, "Hibernate exit text too big or misaligned")
 #endif
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
-ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) == PAGE_SIZE,
+ASSERT((__entry_tramp_text_end - __entry_tramp_text_start) <= 3*PAGE_SIZE,
 	"Entry trampoline text too big")
 #endif
 /*
diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 300972b01939..fa53bee52f87 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -541,6 +541,8 @@ early_param("rodata", parse_rodata);
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 static int __init map_entry_trampoline(void)
 {
+	int i;
+
 	pgprot_t prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC;
 	phys_addr_t pa_start = __pa_symbol(__entry_tramp_text_start);
 
@@ -549,11 +551,15 @@ static int __init map_entry_trampoline(void)
 
 	/* Map only the text into the trampoline page table */
 	memset(tramp_pg_dir, 0, PGD_SIZE);
-	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS, PAGE_SIZE,
-			     prot, pgd_pgtable_alloc, 0);
+	__create_pgd_mapping(tramp_pg_dir, pa_start, TRAMP_VALIAS,
+			     entry_tramp_text_size(), prot, pgd_pgtable_alloc,
+			     0);
 
 	/* Map both the text and data into the kernel page table */
-	__set_fixmap(FIX_ENTRY_TRAMP_TEXT, pa_start, prot);
+	for (i = 0; i < DIV_ROUND_UP(entry_tramp_text_size(), PAGE_SIZE); i++)
+		__set_fixmap(FIX_ENTRY_TRAMP_TEXT1 - i,
+			     pa_start + i * PAGE_SIZE, prot);
+
 	if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
 		extern char __entry_tramp_data_start[];
 
-- 
Gitee


From 4a7a38c5266e2d0dd02b81beb4b6de1d658893ec Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:40 +0000
Subject: [PATCH 248/340] arm64: entry: Add non-kpti __bp_harden_el1_vectors
 for mitigations

stable inclusion
from stable-v4.19.236
commit 901c0a20aa94d09a9328899e2dd69a8d43a3a920
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit aff65393fa1401e034656e349abd655cfe272de0 upstream.

kpti is an optional feature, for systems not using kpti a set of
vectors for the spectre-bhb mitigations is needed.

Add another set of vectors, __bp_harden_el1_vectors, that will be
used if a mitigation is needed and kpti is not in use.

The EL1 ventries are repeated verbatim as there is no additional
work needed for entry from EL1.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 2c34f5d9db0a..6f787ae3bb56 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1073,10 +1073,11 @@ alternative_else_nop_endif
 	.macro tramp_ventry, vector_start, regsize, kpti
 	.align	7
 1:
-	.if	\kpti == 1
 	.if	\regsize == 64
 	msr	tpidrro_el0, x30	// Restored in kernel_ventry
 	.endif
+
+	.if	\kpti == 1
 	/*
 	 * Defend against branch aliasing attacks by pushing a dummy
 	 * entry onto the return stack and using a RET instruction to
@@ -1159,6 +1160,38 @@ __entry_tramp_data_start:
 #endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
+/*
+ * Exception vectors for spectre mitigations on entry from EL1 when
+ * kpti is not in use.
+ */
+	.macro generate_el1_vector
+.Lvector_start\@:
+	kernel_ventry	1, sync_invalid			// Synchronous EL1t
+	kernel_ventry	1, irq_invalid			// IRQ EL1t
+	kernel_ventry	1, fiq_invalid			// FIQ EL1t
+	kernel_ventry	1, error_invalid		// Error EL1t
+
+	kernel_ventry	1, sync				// Synchronous EL1h
+	kernel_ventry	1, irq				// IRQ EL1h
+	kernel_ventry	1, fiq_invalid			// FIQ EL1h
+	kernel_ventry	1, error			// Error EL1h
+
+	.rept	4
+	tramp_ventry	.Lvector_start\@, 64, kpti=0
+	.endr
+	.rept 4
+	tramp_ventry	.Lvector_start\@, 32, kpti=0
+	.endr
+	.endm
+
+	.pushsection ".entry.text", "ax"
+	.align	11
+ENTRY(__bp_harden_el1_vectors)
+	generate_el1_vector
+END(__bp_harden_el1_vectors)
+	.popsection
+
+
 /*
  * Register switch for AArch64. The callee-saved registers need to be saved
  * and restored. On entry:
-- 
Gitee


From fef55b74167e2d00dd5027df583967d205fdf543 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:41 +0000
Subject: [PATCH 249/340] arm64: entry: Add vectors that have the bhb
 mitigation sequences

stable inclusion
from stable-v4.19.236
commit 91429ed04ebe9dbec88f97c6fd136b722bc3f3c5
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit ba2689234be92024e5635d30fe744f4853ad97db upstream.

Some CPUs affected by Spectre-BHB need a sequence of branches, or a
firmware call to be run before any indirect branch. This needs to go
in the vectors. No CPU needs both.

While this can be patched in, it would run on all CPUs as there is a
single set of vectors. If only one part of a big/little combination is
affected, the unaffected CPUs have to run the mitigation too.

Create extra vectors that include the sequence. Subsequent patches will
allow affected CPUs to select this set of vectors. Later patches will
modify the loop count to match what the CPU requires.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/assembler.h | 25 ++++++++++++++
 arch/arm64/include/asm/vectors.h   | 34 +++++++++++++++++++
 arch/arm64/kernel/entry.S          | 53 +++++++++++++++++++++++++-----
 include/linux/arm-smccc.h          |  7 ++++
 4 files changed, 110 insertions(+), 9 deletions(-)
 create mode 100644 arch/arm64/include/asm/vectors.h

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index becb5454151e..3fbe273e84ad 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -703,4 +703,29 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 .Lyield_out_\@ :
 	.endm
 
+	.macro __mitigate_spectre_bhb_loop      tmp
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	mov	\tmp, #32
+.Lspectre_bhb_loop\@:
+	b	. + 4
+	subs	\tmp, \tmp, #1
+	b.ne	.Lspectre_bhb_loop\@
+	dsb	nsh
+	isb
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
+
+	/* Save/restores x0-x3 to the stack */
+	.macro __mitigate_spectre_bhb_fw
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	stp	x0, x1, [sp, #-16]!
+	stp	x2, x3, [sp, #-16]!
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+alternative_cb	arm64_update_smccc_conduit
+	nop					// Patched to SMC/HVC #0
+alternative_cb_end
+	ldp	x2, x3, [sp], #16
+	ldp	x0, x1, [sp], #16
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	.endm
 #endif	/* __ASM_ASSEMBLER_H */
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
new file mode 100644
index 000000000000..16ca74260375
--- /dev/null
+++ b/arch/arm64/include/asm/vectors.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2022 ARM Ltd.
+ */
+#ifndef __ASM_VECTORS_H
+#define __ASM_VECTORS_H
+
+/*
+ * Note: the order of this enum corresponds to two arrays in entry.S:
+ * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
+ * 'full fat' vectors are used directly.
+ */
+enum arm64_bp_harden_el1_vectors {
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	/*
+	 * Perform the BHB loop mitigation, before branching to the canonical
+	 * vectors.
+	 */
+	EL1_VECTOR_BHB_LOOP,
+
+	/*
+	 * Make the SMC call for firmware mitigation, before branching to the
+	 * canonical vectors.
+	 */
+	EL1_VECTOR_BHB_FW,
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
+	/*
+	 * Remap the kernel before branching to the canonical vectors.
+	 */
+	EL1_VECTOR_KPTI,
+};
+
+#endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 6f787ae3bb56..07670da037be 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1070,13 +1070,26 @@ alternative_else_nop_endif
 	sub	\dst, \dst, PAGE_SIZE
 	.endm
 
-	.macro tramp_ventry, vector_start, regsize, kpti
+
+#define BHB_MITIGATION_NONE	0
+#define BHB_MITIGATION_LOOP	1
+#define BHB_MITIGATION_FW	2
+
+	.macro tramp_ventry, vector_start, regsize, kpti, bhb
 	.align	7
 1:
 	.if	\regsize == 64
 	msr	tpidrro_el0, x30	// Restored in kernel_ventry
 	.endif
 
+	.if	\bhb == BHB_MITIGATION_LOOP
+	/*
+	 * This sequence must appear before the first indirect branch. i.e. the
+	 * ret out of tramp_ventry. It appears here because x30 is free.
+	 */
+	__mitigate_spectre_bhb_loop	x30
+	.endif // \bhb == BHB_MITIGATION_LOOP
+
 	.if	\kpti == 1
 	/*
 	 * Defend against branch aliasing attacks by pushing a dummy
@@ -1101,6 +1114,15 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	ldr	x30, =vectors
 	.endif // \kpti == 1
 
+	.if	\bhb == BHB_MITIGATION_FW
+	/*
+	 * The firmware sequence must appear before the first indirect branch.
+	 * i.e. the ret out of tramp_ventry. But it also needs the stack to be
+	 * mapped to save/restore the registers the SMC clobbers.
+	 */
+	__mitigate_spectre_bhb_fw
+	.endif // \bhb == BHB_MITIGATION_FW
+
 	add	x30, x30, #(1b - \vector_start + 4)
 	ret
 .org 1b + 128	// Did we overflow the ventry slot?
@@ -1108,6 +1130,9 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 
 	.macro tramp_exit, regsize = 64
 	adr	x30, tramp_vectors
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	add	x30, x30, SZ_4K
+#endif
 	msr	vbar_el1, x30
 	ldr	lr, [sp, #S_LR]
 	tramp_unmap_kernel	x29
@@ -1118,26 +1143,32 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	eret
 	.endm
 
-	.macro	generate_tramp_vector,	kpti
+	.macro	generate_tramp_vector,	kpti, bhb
 .Lvector_start\@:
 	.space	0x400
 
 	.rept	4
-	tramp_ventry	.Lvector_start\@, 64, \kpti
+	tramp_ventry	.Lvector_start\@, 64, \kpti, \bhb
 	.endr
 	.rept	4
-	tramp_ventry	.Lvector_start\@, 32, \kpti
+	tramp_ventry	.Lvector_start\@, 32, \kpti, \bhb
 	.endr
 	.endm
 
 #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 /*
  * Exception vectors trampoline.
+ * The order must match __bp_harden_el1_vectors and the
+ * arm64_bp_harden_el1_vectors enum.
  */
 	.pushsection ".entry.tramp.text", "ax"
 	.align	11
 ENTRY(tramp_vectors)
-	generate_tramp_vector	kpti=1
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_LOOP
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_FW
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_NONE
 END(tramp_vectors)
 
 ENTRY(tramp_exit_native)
@@ -1164,7 +1195,7 @@ __entry_tramp_data_start:
  * Exception vectors for spectre mitigations on entry from EL1 when
  * kpti is not in use.
  */
-	.macro generate_el1_vector
+	.macro generate_el1_vector, bhb
 .Lvector_start\@:
 	kernel_ventry	1, sync_invalid			// Synchronous EL1t
 	kernel_ventry	1, irq_invalid			// IRQ EL1t
@@ -1177,17 +1208,21 @@ __entry_tramp_data_start:
 	kernel_ventry	1, error			// Error EL1h
 
 	.rept	4
-	tramp_ventry	.Lvector_start\@, 64, kpti=0
+	tramp_ventry	.Lvector_start\@, 64, 0, \bhb
 	.endr
 	.rept 4
-	tramp_ventry	.Lvector_start\@, 32, kpti=0
+	tramp_ventry	.Lvector_start\@, 32, 0, \bhb
 	.endr
 	.endm
 
+/* The order must match tramp_vecs and the arm64_bp_harden_el1_vectors enum. */
 	.pushsection ".entry.text", "ax"
 	.align	11
 ENTRY(__bp_harden_el1_vectors)
-	generate_el1_vector
+#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+	generate_el1_vector	bhb=BHB_MITIGATION_LOOP
+	generate_el1_vector	bhb=BHB_MITIGATION_FW
+#endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
 END(__bp_harden_el1_vectors)
 	.popsection
 
diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h
index a378a1d53fb9..8d3eb27e6890 100644
--- a/include/linux/arm-smccc.h
+++ b/include/linux/arm-smccc.h
@@ -86,6 +86,13 @@
 			   ARM_SMCCC_SMC_32,				\
 			   0, 0x7fff)
 
+#define ARM_SMCCC_ARCH_WORKAROUND_3					\
+	ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL,				\
+			   ARM_SMCCC_SMC_32,				\
+			   0, 0x3fff)
+
+#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED	1
+
 #ifndef __ASSEMBLY__
 
 #include <linux/linkage.h>
-- 
Gitee


From a68ead8779f9ac6b092949a9e0d7d5eee7337e21 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:42 +0000
Subject: [PATCH 250/340] arm64: entry: Add macro for reading symbol addresses
 from the trampoline

stable inclusion
from stable-v4.19.236
commit e18876b523d5f5fd8b8f34721f60a470caf20aa1
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit b28a8eebe81c186fdb1a0078263b30576c8e1f42 upstream.

The trampoline code needs to use the address of symbols in the wider
kernel, e.g. vectors. PC-relative addressing wouldn't work as the
trampoline code doesn't run at the address the linker expected.

tramp_ventry uses a literal pool, unless CONFIG_RANDOMIZE_BASE is
set, in which case it uses the data page as a literal pool because
the data page can be unmapped when running in user-space, which is
required for CPUs vulnerable to meltdown.

Pull this logic out as a macro, instead of adding a third copy
of it.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/kernel/entry.S | 35 ++++++++++++++++-------------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 07670da037be..ceef7427e412 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1070,6 +1070,15 @@ alternative_else_nop_endif
 	sub	\dst, \dst, PAGE_SIZE
 	.endm
 
+	.macro tramp_data_read_var	dst, var
+#ifdef CONFIG_RANDOMIZE_BASE
+	tramp_data_page		\dst
+	add	\dst, \dst, #:lo12:__entry_tramp_data_\var
+	ldr	\dst, [\dst]
+#else
+	ldr	\dst, =\var
+#endif
+	.endm
 
 #define BHB_MITIGATION_NONE	0
 #define BHB_MITIGATION_LOOP	1
@@ -1100,13 +1109,8 @@ alternative_else_nop_endif
 	b	.
 2:
 	tramp_map_kernel	x30
-#ifdef CONFIG_RANDOMIZE_BASE
-	tramp_data_page		x30
 alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
-	ldr	x30, [x30]
-#else
-	ldr	x30, =vectors
-#endif
+	tramp_data_read_var	x30, vectors
 	prfm	plil1strm, [x30, #(1b - \vector_start)]
 	msr	vbar_el1, x30
 	isb
@@ -1186,7 +1190,12 @@ END(tramp_exit_compat)
 	.align PAGE_SHIFT
 	.globl	__entry_tramp_data_start
 __entry_tramp_data_start:
+__entry_tramp_data_vectors:
 	.quad	vectors
+#ifdef CONFIG_ARM_SDE_INTERFACE
+__entry_tramp_data___sdei_asm_handler:
+	.quad	__sdei_asm_handler
+#endif /* CONFIG_ARM_SDE_INTERFACE */
 	.popsection				// .rodata
 #endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
@@ -1313,13 +1322,7 @@ ENTRY(__sdei_asm_entry_trampoline)
 	 */
 1:	str	x4, [x1, #(SDEI_EVENT_INTREGS + S_ORIG_ADDR_LIMIT)]
 
-#ifdef CONFIG_RANDOMIZE_BASE
-	tramp_data_page		x4
-	add	x4, x4, #:lo12:__sdei_asm_trampoline_next_handler
-	ldr	x4, [x4]
-#else
-	ldr	x4, =__sdei_asm_handler
-#endif
+	tramp_data_read_var     x4, __sdei_asm_handler
 	br	x4
 ENDPROC(__sdei_asm_entry_trampoline)
 NOKPROBE(__sdei_asm_entry_trampoline)
@@ -1342,12 +1345,6 @@ ENDPROC(__sdei_asm_exit_trampoline)
 NOKPROBE(__sdei_asm_exit_trampoline)
 	.ltorg
 .popsection		// .entry.tramp.text
-#ifdef CONFIG_RANDOMIZE_BASE
-.pushsection ".rodata", "a"
-__sdei_asm_trampoline_next_handler:
-	.quad	__sdei_asm_handler
-.popsection		// .rodata
-#endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
 
 /*
-- 
Gitee


From e829d824f4fa3f1e0aea8e02d309868c4f419334 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:43 +0000
Subject: [PATCH 251/340] arm64: Add percpu vectors for EL1

stable inclusion
from stable-v4.19.236
commit 5b5ca2608fbd6f250281b6a1d0d73613f250e6f1
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit bd09128d16fac3c34b80bd6a29088ac632e8ce09 upstream.

The Spectre-BHB workaround adds a firmware call to the vectors. This
is needed on some CPUs, but not others. To avoid the unaffected CPU in
a big/little pair from making the firmware call, create per cpu vectors.

The per-cpu vectors only apply when returning from EL0.

Systems using KPTI can use the canonical 'full-fat' vectors directly at
EL1, the trampoline exit code will switch to this_cpu_vector on exit to
EL0. Systems not using KPTI should always use this_cpu_vector.

this_cpu_vector will point at a vector in tramp_vecs or
__bp_harden_el1_vectors, depending on whether KPTI is in use.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/arm64/kernel/cpufeature.c
    arch/arm64/kvm/hyp/switch.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/mmu.h     |  2 +-
 arch/arm64/include/asm/vectors.h | 27 +++++++++++++++++++++++++++
 arch/arm64/kernel/cpufeature.c   | 11 +++++++++++
 arch/arm64/kernel/entry.S        | 16 ++++++++++------
 arch/arm64/kvm/hyp/switch.c      |  8 ++++++--
 5 files changed, 55 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index 3e02efc9cdd0..d6d47fa7623d 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -50,7 +50,7 @@ extern int res_mem_count;
  */
 #define ASID(mm)	((mm)->context.id.counter & 0xffff)
 
-static inline bool arm64_kernel_unmapped_at_el0(void)
+static __always_inline bool arm64_kernel_unmapped_at_el0(void)
 {
 	return IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0) &&
 	       cpus_have_const_cap(ARM64_UNMAP_KERNEL_AT_EL0);
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
index 16ca74260375..3f76dfd9e074 100644
--- a/arch/arm64/include/asm/vectors.h
+++ b/arch/arm64/include/asm/vectors.h
@@ -5,6 +5,15 @@
 #ifndef __ASM_VECTORS_H
 #define __ASM_VECTORS_H
 
+#include <linux/bug.h>
+#include <linux/percpu.h>
+
+#include <asm/fixmap.h>
+
+extern char vectors[];
+extern char tramp_vectors[];
+extern char __bp_harden_el1_vectors[];
+
 /*
  * Note: the order of this enum corresponds to two arrays in entry.S:
  * tramp_vecs and __bp_harden_el1_vectors. By default the canonical
@@ -31,4 +40,22 @@ enum arm64_bp_harden_el1_vectors {
 	EL1_VECTOR_KPTI,
 };
 
+/* The vectors to use on return from EL0. e.g. to remap the kernel */
+DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
+
+#ifndef CONFIG_UNMAP_KERNEL_AT_EL0
+#define TRAMP_VALIAS	0
+#endif
+
+static inline const char *
+arm64_get_bp_hardening_vector(enum arm64_bp_harden_el1_vectors slot)
+{
+	if (arm64_kernel_unmapped_at_el0())
+		return (char *)TRAMP_VALIAS + SZ_2K * slot;
+
+	WARN_ON_ONCE(slot == EL1_VECTOR_KPTI);
+
+	return __bp_harden_el1_vectors + SZ_2K * slot;
+}
+
 #endif /* __ASM_VECTORS_H */
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 1bf9d84265de..f25f0dfed813 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -20,11 +20,13 @@
 
 #include <linux/bsearch.h>
 #include <linux/cpumask.h>
+#include <linux/percpu.h>
 #include <linux/sort.h>
 #include <linux/stop_machine.h>
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/cpu.h>
+
 #include <asm/cpu.h>
 #include <asm/cpufeature.h>
 #include <asm/cpu_ops.h>
@@ -33,6 +35,7 @@
 #include <asm/processor.h>
 #include <asm/sysreg.h>
 #include <asm/traps.h>
+#include <asm/vectors.h>
 #include <asm/virt.h>
 
 unsigned long elf_hwcap __read_mostly;
@@ -54,6 +57,8 @@ EXPORT_SYMBOL(cpu_hwcaps);
 /* Need also bit for ARM64_CB_PATCH */
 DECLARE_BITMAP(boot_capabilities, ARM64_NPATCHABLE);
 
+DEFINE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector) = vectors;
+
 /*
  * Flag to indicate if we have computed the system wide
  * capabilities based on the boot time active CPUs. This
@@ -1038,6 +1043,12 @@ kpti_install_ng_mappings(const struct arm64_cpu_capabilities *__unused)
 	static bool kpti_applied = false;
 	int cpu = smp_processor_id();
 
+	if (__this_cpu_read(this_cpu_vector) == vectors) {
+		const char *v = arm64_get_bp_hardening_vector(EL1_VECTOR_KPTI);
+
+		__this_cpu_write(this_cpu_vector, v);
+	}
+
 	if (kpti_applied)
 		return;
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ceef7427e412..65f66020985a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -71,7 +71,6 @@
 	.macro kernel_ventry, el, label, regsize = 64
 	.align 7
 .Lventry_start\@:
-#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
 	.if	\el == 0
 	/*
 	 * This must be the first instruction of the EL0 vector entries. It is
@@ -86,7 +85,6 @@
 	.endif
 .Lskip_tramp_vectors_cleanup\@:
 	.endif
-#endif
 
 	sub	sp, sp, #S_FRAME_SIZE
 #ifdef CONFIG_VMAP_STACK
@@ -1133,10 +1131,14 @@ alternative_insn isb, nop, ARM64_WORKAROUND_QCOM_FALKOR_E1003
 	.endm
 
 	.macro tramp_exit, regsize = 64
-	adr	x30, tramp_vectors
-#ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-	add	x30, x30, SZ_4K
-#endif
+	tramp_data_read_var	x30, this_cpu_vector
+alternative_if_not ARM64_HAS_VIRT_HOST_EXTN
+	mrs	x29, tpidr_el1
+alternative_else
+	mrs	x29, tpidr_el2
+alternative_endif
+	ldr	x30, [x30, x29]
+
 	msr	vbar_el1, x30
 	ldr	lr, [sp, #S_LR]
 	tramp_unmap_kernel	x29
@@ -1196,6 +1198,8 @@ __entry_tramp_data_vectors:
 __entry_tramp_data___sdei_asm_handler:
 	.quad	__sdei_asm_handler
 #endif /* CONFIG_ARM_SDE_INTERFACE */
+__entry_tramp_data_this_cpu_vector:
+	.quad	this_cpu_vector
 	.popsection				// .rodata
 #endif /* CONFIG_RANDOMIZE_BASE */
 #endif /* CONFIG_UNMAP_KERNEL_AT_EL0 */
diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c
index ae473600cb97..6473c7c96b14 100644
--- a/arch/arm64/kvm/hyp/switch.c
+++ b/arch/arm64/kvm/hyp/switch.c
@@ -35,6 +35,7 @@
 #include <asm/debug-monitors.h>
 #include <asm/processor.h>
 #include <asm/thread_info.h>
+#include <asm/vectors.h>
 
 /* Check whether the FP regs were dirtied while in the host-side run loop: */
 static bool __hyp_text update_fp_enabled(struct kvm_vcpu *vcpu)
@@ -153,10 +154,13 @@ static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu)
 
 static void deactivate_traps_vhe(void)
 {
-	extern char vectors[];	/* kernel exception vectors */
+	const char *host_vectors = vectors;
 	write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2);
 	write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1);
-	write_sysreg(vectors, vbar_el1);
+
+	if (!arm64_kernel_unmapped_at_el0())
+		host_vectors = __this_cpu_read(this_cpu_vector);
+	write_sysreg(host_vectors, vbar_el1);
 }
 NOKPROBE_SYMBOL(deactivate_traps_vhe);
 
-- 
Gitee


From 39da68e72aa79e494eb74eb94163f75ca2075d52 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:44 +0000
Subject: [PATCH 252/340] arm64: proton-pack: Report Spectre-BHB
 vulnerabilities as part of Spectre-v2

stable inclusion
from stable-v4.19.236
commit 7b012f6597e55a2ea4c7efe94b5d9a792b6e5757
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit dee435be76f4117410bbd90573a881fd33488f37 upstream.

Speculation attacks against some high-performance processors can
make use of branch history to influence future speculation as part of
a spectre-v2 attack. This is not mitigated by CSV2, meaning CPUs that
previously reported 'Not affected' are now moderately mitigated by CSV2.

Update the value in /sys/devices/system/cpu/vulnerabilities/spectre_v2
to also show the state of the BHB mitigation.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[ code move to cpu_errata.c for backport ]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/arm64/include/asm/cpufeature.h

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/cpufeature.h |  9 +++++++
 arch/arm64/kernel/cpu_errata.c      | 38 ++++++++++++++++++++++++++---
 2 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index c43d02365fb6..0833197ea5c1 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -549,6 +549,15 @@ static inline int arm64_get_ssbd_state(void)
 
 void arm64_set_ssbd_mitigation(bool state);
 
+/* Watch out, ordering is important here. */
+enum mitigation_state {
+	SPECTRE_UNAFFECTED,
+	SPECTRE_MITIGATED,
+	SPECTRE_VULNERABLE,
+};
+
+enum mitigation_state arm64_get_spectre_bhb_state(void);
+
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 {
        switch (parange) {
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index a6f2451b4469..b25072519102 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -957,14 +957,39 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
 	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
 }
 
+static const char *get_bhb_affected_string(enum mitigation_state bhb_state)
+{
+	switch (bhb_state) {
+	case SPECTRE_UNAFFECTED:
+		return "";
+	default:
+	case SPECTRE_VULNERABLE:
+		return ", but not BHB";
+	case SPECTRE_MITIGATED:
+		return ", BHB";
+	}
+}
+
 ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 		char *buf)
 {
-	if (__spectrev2_safe)
-		return sprintf(buf, "Not affected\n");
+	enum mitigation_state bhb_state = arm64_get_spectre_bhb_state();
+	const char *bhb_str = get_bhb_affected_string(bhb_state);
+	const char *v2_str = "Branch predictor hardening";
+
+	if (__spectrev2_safe) {
+		if (bhb_state == SPECTRE_UNAFFECTED)
+			return sprintf(buf, "Not affected\n");
+
+		/*
+		 * Platforms affected by Spectre-BHB can't report
+		 * "Not affected" for Spectre-v2.
+		 */
+		v2_str = "CSV2";
+	}
 
 	if (__hardenbp_enab)
-		return sprintf(buf, "Mitigation: Branch predictor hardening\n");
+		return sprintf(buf, "Mitigation: %s%s\n", v2_str, bhb_str);
 
 	return sprintf(buf, "Vulnerable\n");
 }
@@ -985,3 +1010,10 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev,
 
 	return sprintf(buf, "Vulnerable\n");
 }
+
+static enum mitigation_state spectre_bhb_state;
+
+enum mitigation_state arm64_get_spectre_bhb_state(void)
+{
+	return spectre_bhb_state;
+}
-- 
Gitee


From def2df575d30cc95c57ffd46939803f93cae489e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:45 +0000
Subject: [PATCH 253/340] KVM: arm64: Add templates for BHB mitigation
 sequences

stable inclusion
from stable-v4.19.236
commit a68912a3ae3413be5febcaa40e7e0ec1fd62adee
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

KVM writes the Spectre-v2 mitigation template at the beginning of each
vector when a CPU requires a specific sequence to run.

Because the template is copied, it can not be modified by the alternatives
at runtime. As the KVM template code is intertwined with the bp-hardening
callbacks, all templates must have a bp-hardening callback.

Add templates for calling ARCH_WORKAROUND_3 and one for each value of K
in the brancy-loop. Identify these sequences by a new parameter
template_start, and add a copy of install_bp_hardening_cb() that is able to
install them.

Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/arm64/include/asm/cpucaps.h

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/cpucaps.h |  2 +
 arch/arm64/include/asm/kvm_mmu.h |  6 ++-
 arch/arm64/include/asm/mmu.h     |  6 +++
 arch/arm64/kernel/cpu_errata.c   | 65 +++++++++++++++++++++++++++++++-
 arch/arm64/kvm/hyp/hyp-entry.S   | 54 ++++++++++++++++++++++++++
 5 files changed, 130 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h
index 69515f9077e3..f6d2ed5341a4 100644
--- a/arch/arm64/include/asm/cpucaps.h
+++ b/arch/arm64/include/asm/cpucaps.h
@@ -64,4 +64,6 @@
 #define ARM64_NCAPS				38
 #endif
 
+#define ARM64_SPECTRE_BHB			40
+
 #endif /* __ASM_CPUCAPS_H */
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 399228f9f254..f8f00c054776 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -488,7 +488,8 @@ static inline void *kvm_get_hyp_vector(void)
 	void *vect = kern_hyp_va(kvm_ksym_ref(__kvm_hyp_vector));
 	int slot = -1;
 
-	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) && data->fn) {
+	if ((cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) ||
+	     cpus_have_const_cap(ARM64_SPECTRE_BHB)) && data->template_start) {
 		vect = kern_hyp_va(kvm_ksym_ref(__bp_harden_hyp_vecs_start));
 		slot = data->hyp_vectors_slot;
 	}
@@ -517,7 +518,8 @@ static inline int kvm_map_vectors(void)
 	 * !HBP +  HEL2 -> allocate one vector slot and use exec mapping
 	 *  HBP +  HEL2 -> use hardened vertors and use exec mapping
 	 */
-	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR)) {
+	if (cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR) ||
+	    cpus_have_const_cap(ARM64_SPECTRE_BHB)) {
 		__kvm_bp_vect_base = kvm_ksym_ref(__bp_harden_hyp_vecs_start);
 		__kvm_bp_vect_base = kern_hyp_va(__kvm_bp_vect_base);
 	}
diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h
index d6d47fa7623d..fac1a43f4ac3 100644
--- a/arch/arm64/include/asm/mmu.h
+++ b/arch/arm64/include/asm/mmu.h
@@ -61,6 +61,12 @@ typedef void (*bp_hardening_cb_t)(void);
 struct bp_hardening_data {
 	int			hyp_vectors_slot;
 	bp_hardening_cb_t	fn;
+
+	/*
+	 * template_start is only used by the BHB mitigation to identify the
+	 * hyp_vectors_slot sequence.
+	 */
+	const char *template_start;
 };
 
 #if (defined(CONFIG_HARDEN_BRANCH_PREDICTOR) ||	\
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index b25072519102..82ad16b59366 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -147,6 +147,14 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data);
 #ifdef CONFIG_KVM_INDIRECT_VECTORS
 extern char __smccc_workaround_1_smc_start[];
 extern char __smccc_workaround_1_smc_end[];
+extern char __smccc_workaround_3_smc_start[];
+extern char __smccc_workaround_3_smc_end[];
+extern char __spectre_bhb_loop_k8_start[];
+extern char __spectre_bhb_loop_k8_end[];
+extern char __spectre_bhb_loop_k24_start[];
+extern char __spectre_bhb_loop_k24_end[];
+extern char __spectre_bhb_loop_k32_start[];
+extern char __spectre_bhb_loop_k32_end[];
 
 static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 				const char *hyp_vecs_end)
@@ -160,11 +168,11 @@ static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 	__flush_icache_range((uintptr_t)dst, (uintptr_t)dst + SZ_2K);
 }
 
+static DEFINE_SPINLOCK(bp_lock);
 static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 				    const char *hyp_vecs_start,
 				    const char *hyp_vecs_end)
 {
-	static DEFINE_SPINLOCK(bp_lock);
 	int cpu, slot = -1;
 
 	spin_lock(&bp_lock);
@@ -183,6 +191,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn,
 
 	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
 	__this_cpu_write(bp_hardening_data.fn, fn);
+	__this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start);
 	spin_unlock(&bp_lock);
 }
 #else
@@ -1017,3 +1026,57 @@ enum mitigation_state arm64_get_spectre_bhb_state(void)
 {
 	return spectre_bhb_state;
 }
+
+#ifdef CONFIG_KVM_INDIRECT_VECTORS
+static const char *kvm_bhb_get_vecs_end(const char *start)
+{
+	if (start == __smccc_workaround_3_smc_start)
+		return __smccc_workaround_3_smc_end;
+	else if (start == __spectre_bhb_loop_k8_start)
+		return __spectre_bhb_loop_k8_end;
+	else if (start == __spectre_bhb_loop_k24_start)
+		return __spectre_bhb_loop_k24_end;
+	else if (start == __spectre_bhb_loop_k32_start)
+		return __spectre_bhb_loop_k32_end;
+
+	return NULL;
+}
+
+void kvm_setup_bhb_slot(const char *hyp_vecs_start)
+{
+	int cpu, slot = -1;
+	const char *hyp_vecs_end;
+
+	if (!IS_ENABLED(CONFIG_KVM) || !is_hyp_mode_available())
+		return;
+
+	hyp_vecs_end = kvm_bhb_get_vecs_end(hyp_vecs_start);
+	if (WARN_ON_ONCE(!hyp_vecs_start || !hyp_vecs_end))
+		return;
+
+	spin_lock(&bp_lock);
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(bp_hardening_data.template_start, cpu) == hyp_vecs_start) {
+			slot = per_cpu(bp_hardening_data.hyp_vectors_slot, cpu);
+			break;
+		}
+	}
+
+	if (slot == -1) {
+		slot = atomic_inc_return(&arm64_el2_vector_last_slot);
+		BUG_ON(slot >= BP_HARDEN_EL2_SLOTS);
+		__copy_hyp_vect_bpi(slot, hyp_vecs_start, hyp_vecs_end);
+	}
+
+	__this_cpu_write(bp_hardening_data.hyp_vectors_slot, slot);
+	__this_cpu_write(bp_hardening_data.template_start, hyp_vecs_start);
+	spin_unlock(&bp_lock);
+}
+#else
+#define __smccc_workaround_3_smc_start NULL
+#define __spectre_bhb_loop_k8_start NULL
+#define __spectre_bhb_loop_k24_start NULL
+#define __spectre_bhb_loop_k32_start NULL
+
+void kvm_setup_bhb_slot(const char *hyp_vecs_start) { };
+#endif
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index b1f14f736962..acdf1e51e51c 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -329,4 +329,58 @@ ENTRY(__smccc_workaround_1_smc_start)
 	ldp	x0, x1, [sp, #(8 * 2)]
 	add	sp, sp, #(8 * 4)
 ENTRY(__smccc_workaround_1_smc_end)
+
+ENTRY(__smccc_workaround_3_smc_start)
+	esb
+	sub	sp, sp, #(8 * 4)
+	stp	x2, x3, [sp, #(8 * 0)]
+	stp	x0, x1, [sp, #(8 * 2)]
+	mov	w0, #ARM_SMCCC_ARCH_WORKAROUND_3
+	smc	#0
+	ldp	x2, x3, [sp, #(8 * 0)]
+	ldp	x0, x1, [sp, #(8 * 2)]
+	add	sp, sp, #(8 * 4)
+ENTRY(__smccc_workaround_3_smc_end)
+
+ENTRY(__spectre_bhb_loop_k8_start)
+	esb
+	sub	sp, sp, #(8 * 2)
+	stp	x0, x1, [sp, #(8 * 0)]
+	mov	x0, #8
+2:	b	. + 4
+	subs	x0, x0, #1
+	b.ne	2b
+	dsb	nsh
+	isb
+	ldp	x0, x1, [sp, #(8 * 0)]
+	add	sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k8_end)
+
+ENTRY(__spectre_bhb_loop_k24_start)
+	esb
+	sub	sp, sp, #(8 * 2)
+	stp	x0, x1, [sp, #(8 * 0)]
+	mov	x0, #24
+2:	b	. + 4
+	subs	x0, x0, #1
+	b.ne	2b
+	dsb	nsh
+	isb
+	ldp	x0, x1, [sp, #(8 * 0)]
+	add	sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k24_end)
+
+ENTRY(__spectre_bhb_loop_k32_start)
+	esb
+	sub	sp, sp, #(8 * 2)
+	stp	x0, x1, [sp, #(8 * 0)]
+	mov     x0, #32
+2:	b	. + 4
+	subs	x0, x0, #1
+	b.ne	2b
+	dsb	nsh
+	isb
+	ldp	x0, x1, [sp, #(8 * 0)]
+	add	sp, sp, #(8 * 2)
+ENTRY(__spectre_bhb_loop_k32_end)
 #endif
-- 
Gitee


From cb917077efa4fecd975ab17d6aececdd90abe82e Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:46 +0000
Subject: [PATCH 254/340] arm64: Mitigate spectre style branch history side
 channels

stable inclusion
from stable-v4.19.236
commit c20d551744797000c4af993f7d59ef8c69732949
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 558c303c9734af5a813739cd284879227f7297d2 upstream.

Speculation attacks against some high-performance processors can
make use of branch history to influence future speculation.
When taking an exception from user-space, a sequence of branches
or a firmware call overwrites or invalidates the branch history.

The sequence of branches is added to the vectors, and should appear
before the first indirect branch. For systems using KPTI the sequence
is added to the kpti trampoline where it has a free register as the exit
from the trampoline is via a 'ret'. For systems not using KPTI, the same
register tricks are used to free up a register in the vectors.

For the firmware call, arch-workaround-3 clobbers 4 registers, so
there is no choice but to save them to the EL1 stack. This only happens
for entry from EL0, so if we take an exception due to the stack access,
it will not become re-entrant.

For KVM, the existing branch-predictor-hardening vectors are used.
When a spectre version of these vectors is in use, the firmware call
is sufficient to mitigate against Spectre-BHB. For the non-spectre
versions, the sequence of branches is added to the indirect vector.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: <stable@kernel.org> # <v5.17.x 72bb9dcb6c33c arm64: Add Cortex-X2 CPU part definition
Cc: <stable@kernel.org> # <v5.16.x 2d0d656700d67 arm64: Add Neoverse-N2, Cortex-A710 CPU part definition
Cc: <stable@kernel.org> # <v5.10.x 8a6b88e66233f arm64: Add part number for Arm Cortex-A77
[ modified for stable, moved code to cpu_errata.c removed bitmap of
  mitigations, use kvm template infrastructure ]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/arm64/Kconfig
    arch/arm64/include/asm/cpufeature.h
    arch/arm64/include/asm/cputype.h
    arch/arm64/kernel/cpu_errata.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/Kconfig                  |   9 +
 arch/arm64/include/asm/assembler.h  |   4 +-
 arch/arm64/include/asm/cpufeature.h |  18 ++
 arch/arm64/include/asm/cputype.h    |   9 +
 arch/arm64/include/asm/sysreg.h     |   1 +
 arch/arm64/include/asm/vectors.h    |   6 +
 arch/arm64/kernel/cpu_errata.c      | 263 +++++++++++++++++++++++++++-
 arch/arm64/kvm/hyp/hyp-entry.S      |   4 +
 8 files changed, 311 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 02c2f528cb60..80ab9c9dd43c 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -1079,6 +1079,15 @@ config ARM64_SSBD
 
 	  If unsure, say Y.
 
+config MITIGATE_SPECTRE_BRANCH_HISTORY
+	bool "Mitigate Spectre style attacks against branch history" if EXPERT
+	default y
+	help
+	  Speculation attacks against some high-performance processors can
+	  make use of branch history to influence future speculation.
+	  When taking an exception from user-space, a sequence of branches
+	  or a firmware call overwrites the branch history.
+
 menuconfig ARMV8_DEPRECATED
 	bool "Emulate deprecated/obsolete ARMv8 instructions"
 	depends on AARCH32_EL0
diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index 3fbe273e84ad..eeac442eb81a 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -705,7 +705,9 @@ USER(\label, ic	ivau, \tmp2)			// invalidate I line PoU
 
 	.macro __mitigate_spectre_bhb_loop      tmp
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
-	mov	\tmp, #32
+alternative_cb  spectre_bhb_patch_loop_iter
+	mov	\tmp, #32		// Patched to correct the immediate
+alternative_cb_end
 .Lspectre_bhb_loop\@:
 	b	. + 4
 	subs	\tmp, \tmp, #1
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 0833197ea5c1..1a3f528487d2 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -492,6 +492,21 @@ static inline bool cpu_supports_mixed_endian_el0(void)
 	return id_aa64mmfr0_mixed_endian_el0(read_cpuid(ID_AA64MMFR0_EL1));
 }
 
+static inline bool supports_csv2p3(int scope)
+{
+	u64 pfr0;
+	u8 csv2_val;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		pfr0 = read_sysreg_s(SYS_ID_AA64PFR0_EL1);
+	else
+		pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1);
+
+	csv2_val = cpuid_feature_extract_unsigned_field(pfr0,
+							ID_AA64PFR0_CSV2_SHIFT);
+	return csv2_val == 3;
+}
+
 static inline bool system_supports_32bit_el0(void)
 {
 	return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
@@ -557,6 +572,9 @@ enum mitigation_state {
 };
 
 enum mitigation_state arm64_get_spectre_bhb_state(void);
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry, int scope);
+u8 spectre_bhb_loop_affected(int scope);
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *__unused);
 
 static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
 {
diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h
index aa3879612130..852474568a9e 100644
--- a/arch/arm64/include/asm/cputype.h
+++ b/arch/arm64/include/asm/cputype.h
@@ -81,6 +81,10 @@
 #define ARM_CPU_PART_CORTEX_A35		0xD04
 #define ARM_CPU_PART_CORTEX_A55		0xD05
 #define ARM_CPU_PART_CORTEX_A76		0xD0B
+#define ARM_CPU_PART_NEOVERSE_V1	0xD40
+#define ARM_CPU_PART_CORTEX_A78		0xD41
+#define ARM_CPU_PART_CORTEX_X1		0xD44
+#define ARM_CPU_PART_CORTEX_A78C	0xD4B
 
 #define APM_CPU_PART_POTENZA		0x000
 
@@ -115,6 +119,11 @@
 #define MIDR_CORTEX_A35 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A35)
 #define MIDR_CORTEX_A55 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A55)
 #define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76)
+#define MIDR_NEOVERSE_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_NEOVERSE_V1)
+#define MIDR_CORTEX_A78 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78)
+#define MIDR_CORTEX_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_X1)
+#define MIDR_CORTEX_A78C MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A78C)
+
 #define MIDR_THUNDERX	MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX)
 #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX)
 #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX)
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index db16c4844de6..cc866be7569d 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -586,6 +586,7 @@
 #endif
 
 /* id_aa64mmfr1 */
+#define ID_AA64MMFR1_ECBHB_SHIFT	60
 #define ID_AA64MMFR1_PAN_SHIFT		20
 #define ID_AA64MMFR1_LOR_SHIFT		16
 #define ID_AA64MMFR1_HPD_SHIFT		12
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
index 3f76dfd9e074..f222d8e033b3 100644
--- a/arch/arm64/include/asm/vectors.h
+++ b/arch/arm64/include/asm/vectors.h
@@ -9,6 +9,7 @@
 #include <linux/percpu.h>
 
 #include <asm/fixmap.h>
+#include <asm/mmu.h>
 
 extern char vectors[];
 extern char tramp_vectors[];
@@ -40,6 +41,11 @@ enum arm64_bp_harden_el1_vectors {
 	EL1_VECTOR_KPTI,
 };
 
+#ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
+#define EL1_VECTOR_BHB_LOOP		-1
+#define EL1_VECTOR_BHB_FW		-1
+#endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
+
 /* The vectors to use on return from EL0. e.g. to remap the kernel */
 DECLARE_PER_CPU_READ_MOSTLY(const char *, this_cpu_vector);
 
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 82ad16b59366..37ddd14ff85f 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -24,6 +24,7 @@
 #include <asm/cputype.h>
 #include <asm/cpufeature.h>
 #include <asm/smp_plat.h>
+#include <asm/vectors.h>
 #ifdef CONFIG_HISILICON_ERRATUM_HIP08_RU_PREFETCH
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
@@ -940,6 +941,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
 		.matches = has_ssbd_mitigation,
 		.midr_range_list = arm64_ssb_cpus,
 	},
+	{
+		.desc = "Spectre-BHB",
+		.capability = ARM64_SPECTRE_BHB,
+		.type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM,
+		.matches = is_spectre_bhb_affected,
+		.cpu_enable = spectre_bhb_enable_mitigation,
+	},
 #ifdef CONFIG_ARM64_ERRATUM_1463225
 	{
 		.desc = "ARM erratum 1463225",
@@ -1020,6 +1028,33 @@ ssize_t cpu_show_spec_store_bypass(struct device *dev,
 	return sprintf(buf, "Vulnerable\n");
 }
 
+/*
+ * We try to ensure that the mitigation state can never change as the result of
+ * onlining a late CPU.
+ */
+static void update_mitigation_state(enum mitigation_state *oldp,
+				    enum mitigation_state new)
+{
+	enum mitigation_state state;
+
+	do {
+		state = READ_ONCE(*oldp);
+		if (new <= state)
+			break;
+	} while (cmpxchg_relaxed(oldp, state, new) != state);
+}
+
+/*
+ * Spectre BHB.
+ *
+ * A CPU is either:
+ * - Mitigated by a branchy loop a CPU specific number of times, and listed
+ *   in our "loop mitigated list".
+ * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
+ *   software mitigation in the vectors is needed.
+ * - Has CSV2.3, so is unaffected.
+ */
 static enum mitigation_state spectre_bhb_state;
 
 enum mitigation_state arm64_get_spectre_bhb_state(void)
@@ -1027,6 +1062,158 @@ enum mitigation_state arm64_get_spectre_bhb_state(void)
 	return spectre_bhb_state;
 }
 
+/*
+ * This must be called with SCOPE_LOCAL_CPU for each type of CPU, before any
+ * SCOPE_SYSTEM call will give the right answer.
+ */
+u8 spectre_bhb_loop_affected(int scope)
+{
+	u8 k = 0;
+	static u8 max_bhb_k;
+
+	if (scope == SCOPE_LOCAL_CPU) {
+		static const struct midr_range spectre_bhb_k32_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A78C),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_X1),
+			MIDR_ALL_VERSIONS(MIDR_NEOVERSE_V1),
+			{},
+		};
+		static const struct midr_range spectre_bhb_k24_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A76),
+			{},
+		};
+		static const struct midr_range spectre_bhb_k8_list[] = {
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A72),
+			MIDR_ALL_VERSIONS(MIDR_CORTEX_A57),
+			{},
+		};
+
+		if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k32_list))
+			k = 32;
+		else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k24_list))
+			k = 24;
+		else if (is_midr_in_range_list(read_cpuid_id(), spectre_bhb_k8_list))
+			k =  8;
+
+		max_bhb_k = max(max_bhb_k, k);
+	} else {
+		k = max_bhb_k;
+	}
+
+	return k;
+}
+
+static enum mitigation_state spectre_bhb_get_cpu_fw_mitigation_state(void)
+{
+	int ret;
+	struct arm_smccc_res res;
+
+	if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
+		return SPECTRE_VULNERABLE;
+
+	switch (psci_ops.conduit) {
+	case PSCI_CONDUIT_HVC:
+		arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+		break;
+
+	case PSCI_CONDUIT_SMC:
+		arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+				  ARM_SMCCC_ARCH_WORKAROUND_3, &res);
+		break;
+
+	default:
+		return SPECTRE_VULNERABLE;
+	}
+
+	ret = res.a0;
+	switch (ret) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		return SPECTRE_UNAFFECTED;
+	default:
+	case SMCCC_RET_NOT_SUPPORTED:
+		return SPECTRE_VULNERABLE;
+	}
+}
+
+static bool is_spectre_bhb_fw_affected(int scope)
+{
+	static bool system_affected;
+	enum mitigation_state fw_state;
+	bool has_smccc = (psci_ops.smccc_version >= SMCCC_VERSION_1_1);
+	static const struct midr_range spectre_bhb_firmware_mitigated_list[] = {
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A73),
+		MIDR_ALL_VERSIONS(MIDR_CORTEX_A75),
+		{},
+	};
+	bool cpu_in_list = is_midr_in_range_list(read_cpuid_id(),
+					 spectre_bhb_firmware_mitigated_list);
+
+	if (scope != SCOPE_LOCAL_CPU)
+		return system_affected;
+
+	fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+	if (cpu_in_list || (has_smccc && fw_state == SPECTRE_MITIGATED)) {
+		system_affected = true;
+		return true;
+	}
+
+	return false;
+}
+
+static bool supports_ecbhb(int scope)
+{
+	u64 mmfr1;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		mmfr1 = read_sysreg_s(SYS_ID_AA64MMFR1_EL1);
+	else
+		mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
+
+	return cpuid_feature_extract_unsigned_field(mmfr1,
+						    ID_AA64MMFR1_ECBHB_SHIFT);
+}
+
+bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
+			     int scope)
+{
+	WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible());
+
+	if (supports_csv2p3(scope))
+		return false;
+
+	if (spectre_bhb_loop_affected(scope))
+		return true;
+
+	if (is_spectre_bhb_fw_affected(scope))
+		return true;
+
+	return false;
+}
+
+static void this_cpu_set_vectors(enum arm64_bp_harden_el1_vectors slot)
+{
+	const char *v = arm64_get_bp_hardening_vector(slot);
+
+	if (slot < 0)
+		return;
+
+	__this_cpu_write(this_cpu_vector, v);
+
+	/*
+	 * When KPTI is in use, the vectors are switched when exiting to
+	 * user-space.
+	 */
+	if (arm64_kernel_unmapped_at_el0())
+		return;
+
+	write_sysreg(v, vbar_el1);
+	isb();
+}
+
 #ifdef CONFIG_KVM_INDIRECT_VECTORS
 static const char *kvm_bhb_get_vecs_end(const char *start)
 {
@@ -1042,7 +1229,7 @@ static const char *kvm_bhb_get_vecs_end(const char *start)
 	return NULL;
 }
 
-void kvm_setup_bhb_slot(const char *hyp_vecs_start)
+static void kvm_setup_bhb_slot(const char *hyp_vecs_start)
 {
 	int cpu, slot = -1;
 	const char *hyp_vecs_end;
@@ -1078,5 +1265,77 @@ void kvm_setup_bhb_slot(const char *hyp_vecs_start)
 #define __spectre_bhb_loop_k24_start NULL
 #define __spectre_bhb_loop_k32_start NULL
 
-void kvm_setup_bhb_slot(const char *hyp_vecs_start) { };
+static void kvm_setup_bhb_slot(const char *hyp_vecs_start) { };
 #endif
+
+void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
+{
+	enum mitigation_state fw_state, state = SPECTRE_VULNERABLE;
+
+	if (!is_spectre_bhb_affected(entry, SCOPE_LOCAL_CPU))
+		return;
+
+	if (!__spectrev2_safe &&  !__hardenbp_enab) {
+		/* No point mitigating Spectre-BHB alone. */
+	} else if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY)) {
+		pr_info_once("spectre-bhb mitigation disabled by compile time option\n");
+	} else if (cpu_mitigations_off()) {
+		pr_info_once("spectre-bhb mitigation disabled by command line option\n");
+	} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+		state = SPECTRE_MITIGATED;
+	} else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
+		switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) {
+		case 8:
+			kvm_setup_bhb_slot(__spectre_bhb_loop_k8_start);
+			break;
+		case 24:
+			kvm_setup_bhb_slot(__spectre_bhb_loop_k24_start);
+			break;
+		case 32:
+			kvm_setup_bhb_slot(__spectre_bhb_loop_k32_start);
+			break;
+		default:
+			WARN_ON_ONCE(1);
+		}
+		this_cpu_set_vectors(EL1_VECTOR_BHB_LOOP);
+
+		state = SPECTRE_MITIGATED;
+	} else if (is_spectre_bhb_fw_affected(SCOPE_LOCAL_CPU)) {
+		fw_state = spectre_bhb_get_cpu_fw_mitigation_state();
+		if (fw_state == SPECTRE_MITIGATED) {
+			kvm_setup_bhb_slot(__smccc_workaround_3_smc_start);
+			this_cpu_set_vectors(EL1_VECTOR_BHB_FW);
+
+			/*
+			 * With WA3 in the vectors, the WA1 calls can be
+			 * removed.
+			 */
+			__this_cpu_write(bp_hardening_data.fn, NULL);
+
+			state = SPECTRE_MITIGATED;
+		}
+	}
+
+	update_mitigation_state(&spectre_bhb_state, state);
+}
+
+/* Patched to correct the immediate */
+void __init spectre_bhb_patch_loop_iter(struct alt_instr *alt,
+					__le32 *origptr, __le32 *updptr, int nr_inst)
+{
+	u8 rd;
+	u32 insn;
+	u16 loop_count = spectre_bhb_loop_affected(SCOPE_SYSTEM);
+
+	BUG_ON(nr_inst != 1); /* MOV -> MOV */
+
+	if (!IS_ENABLED(CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY))
+		return;
+
+	insn = le32_to_cpu(*origptr);
+	rd = aarch64_insn_decode_register(AARCH64_INSN_REGTYPE_RD, insn);
+	insn = aarch64_insn_gen_movewide(rd, loop_count, 0,
+					 AARCH64_INSN_VARIANT_64BIT,
+					 AARCH64_INSN_MOVEWIDE_ZERO);
+	*updptr++ = cpu_to_le32(insn);
+}
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index acdf1e51e51c..18f454d772f8 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -111,6 +111,10 @@ el1_hvc_guest:
 	/* ARM_SMCCC_ARCH_WORKAROUND_2 handling */
 	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \
 			  ARM_SMCCC_ARCH_WORKAROUND_2)
+	cbz	w1, wa_epilogue
+
+	eor	w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_2 ^ \
+			  ARM_SMCCC_ARCH_WORKAROUND_3)
 	cbnz	w1, el1_trap
 
 #ifdef CONFIG_ARM64_SSBD
-- 
Gitee


From 8fa0eebcf9d26f0ffdb802cef0b071a71fb41473 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:47 +0000
Subject: [PATCH 255/340] KVM: arm64: Allow SMCCC_ARCH_WORKAROUND_3 to be
 discovered and migrated

stable inclusion
from stable-v4.19.236
commit 5f051d32b03f08a0507ac1afd7b9c0a30c8e5d59
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit a5905d6af492ee6a4a2205f0d550b3f931b03d03 upstream.

KVM allows the guest to discover whether the ARCH_WORKAROUND SMCCC are
implemented, and to preserve that state during migration through its
firmware register interface.

Add the necessary boiler plate for SMCCC_ARCH_WORKAROUND_3.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[ kvm code moved to virt/kvm/arm, removed fw regs ABI. Added 32bit stub ]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Conflicts:
    arch/arm/include/asm/kvm_host.h
    arch/arm64/include/asm/kvm_host.h
    virt/kvm/arm/psci.c

Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/include/asm/kvm_host.h   | 7 +++++++
 arch/arm64/include/asm/kvm_host.h | 5 +++++
 2 files changed, 12 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 0066de61f4c6..46a2e8636f86 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -27,6 +27,7 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 #include <asm/fpstate.h>
+#include <asm/spectre.h>
 #include <kvm/arm_arch_timer.h>
 #include <asm/hisi_cpu_model.h>
 
@@ -405,4 +406,10 @@ static inline int kvm_arm_config_vm(struct kvm *kvm, unsigned long type)
 	return 0;
 }
 
+static inline int kvm_arm_get_spectre_bhb_state(void)
+{
+	/* 32bit guests don't need firmware for this */
+	return SPECTRE_VULNERABLE; /* aka SMCCC_RET_NOT_SUPPORTED */
+}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 556351524748..bf03056e3751 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -625,4 +625,9 @@ void kvm_arch_free_vm(struct kvm *kvm);
 
 int kvm_arm_config_vm(struct kvm *kvm, unsigned long type);
 
+static inline enum mitigation_state kvm_arm_get_spectre_bhb_state(void)
+{
+	return arm64_get_spectre_bhb_state();
+}
+
 #endif /* __ARM64_KVM_HOST_H__ */
-- 
Gitee


From 09501bea5e1c893c432b20ab318b3c4427054dcd Mon Sep 17 00:00:00 2001
From: Joey Gouly <joey.gouly@arm.com>
Date: Wed, 20 Apr 2022 11:53:48 +0000
Subject: [PATCH 256/340] arm64: add ID_AA64ISAR2_EL1 sys register

stable inclusion
from stable-v4.19.236
commit a44e7ddb5822b943cd50c5ad6a2541fb445d58bd
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 9e45365f1469ef2b934f9d035975dbc9ad352116 upstream.

This is a new ID register, introduced in 8.7.

Signed-off-by: Joey Gouly <joey.gouly@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Marc Zyngier <maz@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Alexandru Elisei <alexandru.elisei@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Reiji Watanabe <reijiw@google.com>
Acked-by: Marc Zyngier <maz@kernel.org>
Link: https://lore.kernel.org/r/20211210165432.8106-3-joey.gouly@arm.com
Signed-off-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/cpu.h    | 1 +
 arch/arm64/include/asm/sysreg.h | 1 +
 arch/arm64/kernel/cpufeature.c  | 9 +++++++++
 arch/arm64/kernel/cpuinfo.c     | 1 +
 arch/arm64/kvm/sys_regs.c       | 2 +-
 5 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h
index 88392272250e..3a9908a01219 100644
--- a/arch/arm64/include/asm/cpu.h
+++ b/arch/arm64/include/asm/cpu.h
@@ -36,6 +36,7 @@ struct cpuinfo_arm64 {
 	u64		reg_id_aa64dfr1;
 	u64		reg_id_aa64isar0;
 	u64		reg_id_aa64isar1;
+	u64		reg_id_aa64isar2;
 	u64		reg_id_aa64mmfr0;
 	u64		reg_id_aa64mmfr1;
 	u64		reg_id_aa64mmfr2;
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cc866be7569d..cc0e11463b0b 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -159,6 +159,7 @@
 
 #define SYS_ID_AA64ISAR0_EL1		sys_reg(3, 0, 0, 6, 0)
 #define SYS_ID_AA64ISAR1_EL1		sys_reg(3, 0, 0, 6, 1)
+#define SYS_ID_AA64ISAR2_EL1		sys_reg(3, 0, 0, 6, 2)
 
 #define SYS_ID_AA64MMFR0_EL1		sys_reg(3, 0, 0, 7, 0)
 #define SYS_ID_AA64MMFR1_EL1		sys_reg(3, 0, 0, 7, 1)
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index f25f0dfed813..530c049770b2 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -153,6 +153,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 	ARM64_FTR_END,
 };
 
+static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+	ARM64_FTR_END,
+};
+
 static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = {
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV3_SHIFT, 4, 0),
 	ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0),
@@ -402,6 +406,7 @@ static const struct __ftr_reg_entry {
 	/* Op1 = 0, CRn = 0, CRm = 6 */
 	ARM64_FTR_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0),
 	ARM64_FTR_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1),
+	ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2),
 
 	/* Op1 = 0, CRn = 0, CRm = 7 */
 	ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0),
@@ -550,6 +555,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info)
 	init_cpu_ftr_reg(SYS_ID_AA64DFR1_EL1, info->reg_id_aa64dfr1);
 	init_cpu_ftr_reg(SYS_ID_AA64ISAR0_EL1, info->reg_id_aa64isar0);
 	init_cpu_ftr_reg(SYS_ID_AA64ISAR1_EL1, info->reg_id_aa64isar1);
+	init_cpu_ftr_reg(SYS_ID_AA64ISAR2_EL1, info->reg_id_aa64isar2);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR0_EL1, info->reg_id_aa64mmfr0);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR1_EL1, info->reg_id_aa64mmfr1);
 	init_cpu_ftr_reg(SYS_ID_AA64MMFR2_EL1, info->reg_id_aa64mmfr2);
@@ -667,6 +673,8 @@ void update_cpu_features(int cpu,
 				      info->reg_id_aa64isar0, boot->reg_id_aa64isar0);
 	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR1_EL1, cpu,
 				      info->reg_id_aa64isar1, boot->reg_id_aa64isar1);
+	taint |= check_update_ftr_reg(SYS_ID_AA64ISAR2_EL1, cpu,
+				      info->reg_id_aa64isar2, boot->reg_id_aa64isar2);
 
 	/*
 	 * Differing PARange support is fine as long as all peripherals and
@@ -816,6 +824,7 @@ static u64 __read_sysreg_by_encoding(u32 sys_id)
 	read_sysreg_case(SYS_ID_AA64MMFR2_EL1);
 	read_sysreg_case(SYS_ID_AA64ISAR0_EL1);
 	read_sysreg_case(SYS_ID_AA64ISAR1_EL1);
+	read_sysreg_case(SYS_ID_AA64ISAR2_EL1);
 
 	read_sysreg_case(SYS_CNTFRQ_EL0);
 	read_sysreg_case(SYS_CTR_EL0);
diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c
index b79589ab3070..005d88db1082 100644
--- a/arch/arm64/kernel/cpuinfo.c
+++ b/arch/arm64/kernel/cpuinfo.c
@@ -342,6 +342,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
 	info->reg_id_aa64dfr1 = read_cpuid(ID_AA64DFR1_EL1);
 	info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1);
 	info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1);
+	info->reg_id_aa64isar2 = read_cpuid(ID_AA64ISAR2_EL1);
 	info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1);
 	info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1);
 	info->reg_id_aa64mmfr2 = read_cpuid(ID_AA64MMFR2_EL1);
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 29496548c56b..25caa9ec95e4 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -1325,7 +1325,7 @@ static const struct sys_reg_desc sys_reg_descs[] = {
 	/* CRm=6 */
 	ID_SANITISED(ID_AA64ISAR0_EL1),
 	ID_SANITISED(ID_AA64ISAR1_EL1),
-	ID_UNALLOCATED(6,2),
+	ID_SANITISED(ID_AA64ISAR2_EL1),
 	ID_UNALLOCATED(6,3),
 	ID_UNALLOCATED(6,4),
 	ID_UNALLOCATED(6,5),
-- 
Gitee


From d275c04a841934b8305bbf13931deb07b1bc76f6 Mon Sep 17 00:00:00 2001
From: James Morse <james.morse@arm.com>
Date: Wed, 20 Apr 2022 11:53:49 +0000
Subject: [PATCH 257/340] arm64: Use the clearbhb instruction in mitigations

stable inclusion
from stable-v4.19.236
commit ed5dec3fae86f20db52930e1d9a7cc38403994cc
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 228a26b912287934789023b4132ba76065d9491c upstream.

Future CPUs may implement a clearbhb instruction that is sufficient
to mitigate SpectreBHB. CPUs that implement this instruction, but
not CSV2.3 must be affected by Spectre-BHB.

Add support to use this instruction as the BHB mitigation on CPUs
that support it. The instruction is in the hint space, so it will
be treated by a NOP as older CPUs.

Reviewed-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
[ modified for stable: Use a KVM vector template instead of alternatives ]
Signed-off-by: James Morse <james.morse@arm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Chen Jiahao <chenjiahao16@huawei.com>
Reviewed-by: Hanjun Guo <guohanjun@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm64/include/asm/assembler.h  |  7 +++++++
 arch/arm64/include/asm/cpufeature.h | 13 +++++++++++++
 arch/arm64/include/asm/sysreg.h     |  3 +++
 arch/arm64/include/asm/vectors.h    |  7 +++++++
 arch/arm64/kernel/cpu_errata.c      | 14 ++++++++++++++
 arch/arm64/kernel/cpufeature.c      |  1 +
 arch/arm64/kernel/entry.S           |  8 ++++++++
 arch/arm64/kvm/hyp/hyp-entry.S      |  6 ++++++
 8 files changed, 59 insertions(+)

diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h
index eeac442eb81a..214319505313 100644
--- a/arch/arm64/include/asm/assembler.h
+++ b/arch/arm64/include/asm/assembler.h
@@ -118,6 +118,13 @@
 	hint	#20
 	.endm
 
+/*
+ * Clear Branch History instruction
+ */
+	.macro clearbhb
+	hint	#22
+	.endm
+
 /*
  * Sanitise a 64-bit bounded index wrt speculation, returning zero if out
  * of bounds.
diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h
index 1a3f528487d2..bfb699957880 100644
--- a/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@ -507,6 +507,19 @@ static inline bool supports_csv2p3(int scope)
 	return csv2_val == 3;
 }
 
+static inline bool supports_clearbhb(int scope)
+{
+	u64 isar2;
+
+	if (scope == SCOPE_LOCAL_CPU)
+		isar2 = read_sysreg_s(SYS_ID_AA64ISAR2_EL1);
+	else
+		isar2 = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1);
+
+	return cpuid_feature_extract_unsigned_field(isar2,
+						    ID_AA64ISAR2_CLEARBHB_SHIFT);
+}
+
 static inline bool system_supports_32bit_el0(void)
 {
 	return cpus_have_const_cap(ARM64_HAS_32BIT_EL0);
diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h
index cc0e11463b0b..0fd51d253648 100644
--- a/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@ -528,6 +528,9 @@
 #define ID_AA64ISAR1_JSCVT_SHIFT	12
 #define ID_AA64ISAR1_DPB_SHIFT		0
 
+/* id_aa64isar2 */
+#define ID_AA64ISAR2_CLEARBHB_SHIFT	28
+
 /* id_aa64pfr0 */
 #define ID_AA64PFR0_CSV3_SHIFT		60
 #define ID_AA64PFR0_CSV2_SHIFT		56
diff --git a/arch/arm64/include/asm/vectors.h b/arch/arm64/include/asm/vectors.h
index f222d8e033b3..695583b9a145 100644
--- a/arch/arm64/include/asm/vectors.h
+++ b/arch/arm64/include/asm/vectors.h
@@ -33,6 +33,12 @@ enum arm64_bp_harden_el1_vectors {
 	 * canonical vectors.
 	 */
 	EL1_VECTOR_BHB_FW,
+
+	/*
+	 * Use the ClearBHB instruction, before branching to the canonical
+	 * vectors.
+	 */
+	EL1_VECTOR_BHB_CLEAR_INSN,
 #endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
 
 	/*
@@ -44,6 +50,7 @@ enum arm64_bp_harden_el1_vectors {
 #ifndef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
 #define EL1_VECTOR_BHB_LOOP		-1
 #define EL1_VECTOR_BHB_FW		-1
+#define EL1_VECTOR_BHB_CLEAR_INSN	-1
 #endif /* !CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
 
 /* The vectors to use on return from EL0. e.g. to remap the kernel */
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 37ddd14ff85f..fdff9c6d8969 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -156,6 +156,8 @@ extern char __spectre_bhb_loop_k24_start[];
 extern char __spectre_bhb_loop_k24_end[];
 extern char __spectre_bhb_loop_k32_start[];
 extern char __spectre_bhb_loop_k32_end[];
+extern char __spectre_bhb_clearbhb_start[];
+extern char __spectre_bhb_clearbhb_end[];
 
 static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start,
 				const char *hyp_vecs_end)
@@ -1051,6 +1053,7 @@ static void update_mitigation_state(enum mitigation_state *oldp,
  * - Mitigated by a branchy loop a CPU specific number of times, and listed
  *   in our "loop mitigated list".
  * - Mitigated in software by the firmware Spectre v2 call.
+ * - Has the ClearBHB instruction to perform the mitigation.
  * - Has the 'Exception Clears Branch History Buffer' (ECBHB) feature, so no
  *   software mitigation in the vectors is needed.
  * - Has CSV2.3, so is unaffected.
@@ -1185,6 +1188,9 @@ bool is_spectre_bhb_affected(const struct arm64_cpu_capabilities *entry,
 	if (supports_csv2p3(scope))
 		return false;
 
+	if (supports_clearbhb(scope))
+		return true;
+
 	if (spectre_bhb_loop_affected(scope))
 		return true;
 
@@ -1225,6 +1231,8 @@ static const char *kvm_bhb_get_vecs_end(const char *start)
 		return __spectre_bhb_loop_k24_end;
 	else if (start == __spectre_bhb_loop_k32_start)
 		return __spectre_bhb_loop_k32_end;
+	else if (start == __spectre_bhb_clearbhb_start)
+		return __spectre_bhb_clearbhb_end;
 
 	return NULL;
 }
@@ -1264,6 +1272,7 @@ static void kvm_setup_bhb_slot(const char *hyp_vecs_start)
 #define __spectre_bhb_loop_k8_start NULL
 #define __spectre_bhb_loop_k24_start NULL
 #define __spectre_bhb_loop_k32_start NULL
+#define __spectre_bhb_clearbhb_start NULL
 
 static void kvm_setup_bhb_slot(const char *hyp_vecs_start) { };
 #endif
@@ -1282,6 +1291,11 @@ void spectre_bhb_enable_mitigation(const struct arm64_cpu_capabilities *entry)
 	} else if (cpu_mitigations_off()) {
 		pr_info_once("spectre-bhb mitigation disabled by command line option\n");
 	} else if (supports_ecbhb(SCOPE_LOCAL_CPU)) {
+		state = SPECTRE_MITIGATED;
+	} else if (supports_clearbhb(SCOPE_LOCAL_CPU)) {
+		kvm_setup_bhb_slot(__spectre_bhb_clearbhb_start);
+		this_cpu_set_vectors(EL1_VECTOR_BHB_CLEAR_INSN);
+
 		state = SPECTRE_MITIGATED;
 	} else if (spectre_bhb_loop_affected(SCOPE_LOCAL_CPU)) {
 		switch (spectre_bhb_loop_affected(SCOPE_SYSTEM)) {
diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
index 530c049770b2..aa9178e9acc6 100644
--- a/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@ -154,6 +154,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = {
 };
 
 static const struct arm64_ftr_bits ftr_id_aa64isar2[] = {
+	ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0),
 	ARM64_FTR_END,
 };
 
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 65f66020985a..7c231eb211a4 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1081,6 +1081,7 @@ alternative_else_nop_endif
 #define BHB_MITIGATION_NONE	0
 #define BHB_MITIGATION_LOOP	1
 #define BHB_MITIGATION_FW	2
+#define BHB_MITIGATION_INSN	3
 
 	.macro tramp_ventry, vector_start, regsize, kpti, bhb
 	.align	7
@@ -1097,6 +1098,11 @@ alternative_else_nop_endif
 	__mitigate_spectre_bhb_loop	x30
 	.endif // \bhb == BHB_MITIGATION_LOOP
 
+	.if	\bhb == BHB_MITIGATION_INSN
+	clearbhb
+	isb
+	.endif // \bhb == BHB_MITIGATION_INSN
+
 	.if	\kpti == 1
 	/*
 	 * Defend against branch aliasing attacks by pushing a dummy
@@ -1173,6 +1179,7 @@ ENTRY(tramp_vectors)
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_LOOP
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_FW
+	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_INSN
 #endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
 	generate_tramp_vector	kpti=1, bhb=BHB_MITIGATION_NONE
 END(tramp_vectors)
@@ -1235,6 +1242,7 @@ ENTRY(__bp_harden_el1_vectors)
 #ifdef CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY
 	generate_el1_vector	bhb=BHB_MITIGATION_LOOP
 	generate_el1_vector	bhb=BHB_MITIGATION_FW
+	generate_el1_vector	bhb=BHB_MITIGATION_INSN
 #endif /* CONFIG_MITIGATE_SPECTRE_BRANCH_HISTORY */
 END(__bp_harden_el1_vectors)
 	.popsection
diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S
index 18f454d772f8..454012f16c00 100644
--- a/arch/arm64/kvm/hyp/hyp-entry.S
+++ b/arch/arm64/kvm/hyp/hyp-entry.S
@@ -387,4 +387,10 @@ ENTRY(__spectre_bhb_loop_k32_start)
 	ldp	x0, x1, [sp, #(8 * 0)]
 	add	sp, sp, #(8 * 2)
 ENTRY(__spectre_bhb_loop_k32_end)
+
+ENTRY(__spectre_bhb_clearbhb_start)
+	esb
+	clearbhb
+	isb
+ENTRY(__spectre_bhb_clearbhb_end)
 #endif
-- 
Gitee


From b453c348528392ae14f37d2b35ee63bea5cb9b0b Mon Sep 17 00:00:00 2001
From: Haimin Zhang <tcs_kernel@tencent.com>
Date: Thu, 21 Apr 2022 02:40:42 +0000
Subject: [PATCH 258/340] af_key: add __GFP_ZERO flag for
 compose_sadb_supported in function pfkey_register

stable inclusion
from stable-v4.19.238
commit 693fe8af9a2625139de07bd1ae212a7d89c37795
category: bugfix
bugzilla: 186606, https://gitee.com/src-openeuler/kernel/issues/I53SSV
CVE: CVE-2022-1353

--------------------------------

[ Upstream commit 9a564bccb78a76740ea9d75a259942df8143d02c ]

Add __GFP_ZERO flag for compose_sadb_supported in function pfkey_register
to initialize the buffer of supp_skb to fix a kernel-info-leak issue.
1) Function pfkey_register calls compose_sadb_supported to request
a sk_buff. 2) compose_sadb_supported calls alloc_sbk to allocate
a sk_buff, but it doesn't zero it. 3) If auth_len is greater 0, then
compose_sadb_supported treats the memory as a struct sadb_supported and
begins to initialize. But it just initializes the field sadb_supported_len
and field sadb_supported_exttype without field sadb_supported_reserved.

Reported-by: TCS Robot <tcs_robot@tencent.com>
Signed-off-by: Haimin Zhang <tcs_kernel@tencent.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Xu Jia <xujia39@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Wang Weiyang <wangweiyang2@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/key/af_key.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/key/af_key.c b/net/key/af_key.c
index c7d5a6015389..8ce513e08fd7 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1709,7 +1709,7 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
 
 	xfrm_probe_algs();
 
-	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
+	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL | __GFP_ZERO);
 	if (!supp_skb) {
 		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
 			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
-- 
Gitee


From 7348b7ecb179fb5e2008a02932f7be4ba9117a03 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 22 Apr 2022 08:01:44 +0000
Subject: [PATCH 259/340] x86/entry/64: Fix unwind hints in kernel exit path

stable inclusion
from stable-v4.19.238
commit ac069a960d357a7495fd2a1cde28d9b03250cace
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I53TS3
CVE: NA

--------------------------------

commit 1fb143634a38095b641a3a21220774799772dc4c upstream.

In swapgs_restore_regs_and_return_to_usermode, after the stack is
switched to the trampoline stack, the existing UNWIND_HINT_REGS hint is
no longer valid, which can result in the following ORC unwinder warning:

  WARNING: can't dereference registers at 000000003aeb0cdd for ip swapgs_restore_regs_and_return_to_usermode+0x93/0xa0

For full correctness, we could try to add complicated unwind hints so
the unwinder could continue to find the registers, but when when it's
this close to kernel exit, unwind hints aren't really needed anymore and
it's fine to just use an empty hint which tells the unwinder to stop.

For consistency, also move the UNWIND_HINT_EMPTY in
entry_SYSCALL_64_after_hwframe to a similar location.

Fixes: 3e3b9293d392 ("x86/entry/64: Return to userspace from the trampoline stack")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Reported-by: Dave Jones <dsj@fb.com>
Reported-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
Reported-by: Joe Mario <jmario@redhat.com>
Reported-by: Jann Horn <jannh@google.com>
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/60ea8f562987ed2d9ace2977502fe481c0d7c9a0.1587808742.git.jpoimboe@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Zou Yipeng <zouyipeng@huawei.com>
Reviewed-by: Zhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/entry/entry_64.S | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ccb5e3486aee..ca651113bea2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -312,7 +312,6 @@ GLOBAL(entry_SYSCALL_64_after_hwframe)
 	 */
 syscall_return_via_sysret:
 	/* rcx and r11 are already restored (see code above) */
-	UNWIND_HINT_EMPTY
 	POP_REGS pop_rdi=0 skip_r11rcx=1
 
 	/*
@@ -321,6 +320,7 @@ syscall_return_via_sysret:
 	 */
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+	UNWIND_HINT_EMPTY
 
 	pushq	RSP-RDI(%rdi)	/* RSP */
 	pushq	(%rdi)		/* RDI */
@@ -700,6 +700,7 @@ GLOBAL(swapgs_restore_regs_and_return_to_usermode)
 	 */
 	movq	%rsp, %rdi
 	movq	PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
+	UNWIND_HINT_EMPTY
 
 	/* Copy the IRET frame to the trampoline stack. */
 	pushq	6*8(%rdi)	/* SS */
-- 
Gitee


From 5b5dc732dc8a8b7a4ee08f6f6ab17fdcba624599 Mon Sep 17 00:00:00 2001
From: Josh Poimboeuf <jpoimboe@redhat.com>
Date: Fri, 22 Apr 2022 08:01:45 +0000
Subject: [PATCH 260/340] objtool: Fix stack offset tracking for indirect CFAs

stable inclusion
from stable-v4.19.238
commit 9fbfc77d0f5e04a7434fa6d32112036e2a41bc6e
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I53TS3
CVE: NA

--------------------------------

commit d8dd25a461e4eec7190cb9d66616aceacc5110ad upstream.

When the current frame address (CFA) is stored on the stack (i.e.,
cfa->base == CFI_SP_INDIRECT), objtool neglects to adjust the stack
offset when there are subsequent pushes or pops.  This results in bad
ORC data at the end of the ENTER_IRQ_STACK macro, when it puts the
previous stack pointer on the stack and does a subsequent push.

This fixes the following unwinder warning:

  WARNING: can't dereference registers at 00000000f0a6bdba for ip interrupt_entry+0x9f/0xa0

Fixes: 627fce14809b ("objtool: Add ORC unwind table generation")
Reported-by: Vince Weaver <vincent.weaver@maine.edu>
Reported-by: Dave Jones <dsj@fb.com>
Reported-by: Steven Rostedt <rostedt@goodmis.org>
Reported-by: Vegard Nossum <vegard.nossum@oracle.com>
Reported-by: Joe Mario <jmario@redhat.com>
Reviewed-by: Miroslav Benes <mbenes@suse.cz>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Jann Horn <jannh@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: https://lore.kernel.org/r/853d5d691b29e250333332f09b8e27410b2d9924.1587808742.git.jpoimboe@redhat.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Zou Yipeng <zouyipeng@huawei.com>
Reviewed-by: Zhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 tools/objtool/check.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index ecf5fc77f50b..952273d7afd9 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -1318,7 +1318,7 @@ static int update_insn_state_regs(struct instruction *insn, struct insn_state *s
 	struct cfi_reg *cfa = &state->cfa;
 	struct stack_op *op = &insn->stack_op;
 
-	if (cfa->base != CFI_SP)
+	if (cfa->base != CFI_SP && cfa->base != CFI_SP_INDIRECT)
 		return 0;
 
 	/* push */
-- 
Gitee


From 5de027286b589c83daad07aed84c912d551606c5 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Fri, 22 Apr 2022 13:01:21 +0000
Subject: [PATCH 261/340] ax25: NPD bug when detaching AX25 device

stable inclusion
from linux-4.19.235
commit bd05a8f1b7368ef4f7845548312fc61ab4fa63ce
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I53VJO
CVE: CVE-2022-1199

--------------------------------

commit 1ade48d0c27d5da1ccf4b583d8c5fc8b534a3ac8 upstream.

The existing cleanup routine implementation is not well synchronized
with the syscall routine. When a device is detaching, below race could
occur.

static int ax25_sendmsg(...) {
  ...
  lock_sock()
  ax25 = sk_to_ax25(sk);
  if (ax25->ax25_dev == NULL) // CHECK
  ...
  ax25_queue_xmit(skb, ax25->ax25_dev->dev); // USE
  ...
}

static void ax25_kill_by_device(...) {
  ...
  if (s->ax25_dev == ax25_dev) {
    s->ax25_dev = NULL;
    ...
}

Other syscall functions like ax25_getsockopt, ax25_getname,
ax25_info_show also suffer from similar races. To fix them, this patch
introduce lock_sock() into ax25_kill_by_device in order to guarantee
that the nullify action in cleanup routine cannot proceed when another
socket request is pending.

Signed-off-by: Hanjie Wu <nagi@zju.edu.cn>
Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 44ec492f3dc2..5d05d8dca168 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -88,8 +88,10 @@ static void ax25_kill_by_device(struct net_device *dev)
 again:
 	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
-			s->ax25_dev = NULL;
 			spin_unlock_bh(&ax25_list_lock);
+			lock_sock(s->sk);
+			s->ax25_dev = NULL;
+			release_sock(s->sk);
 			ax25_disconnect(s, ENETUNREACH);
 			spin_lock_bh(&ax25_list_lock);
 
-- 
Gitee


From afbc39a55021ee84bb1dec7c07fc98a36d58e9f7 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 22 Apr 2022 13:01:22 +0000
Subject: [PATCH 262/340] ax25: improve the incomplete fix to avoid UAF and NPD
 bugs

stable inclusion
from linux-4.19.235
commit 3072e72814de56f3c674650a8af98233ddf78b19
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I53VJO
CVE: CVE-2022-1199

--------------------------------

[ Upstream commit 4e0f718daf97d47cf7dec122da1be970f145c809 ]

The previous commit 1ade48d0c27d ("ax25: NPD bug when detaching
AX25 device") introduce lock_sock() into ax25_kill_by_device to
prevent NPD bug. But the concurrency NPD or UAF bug will occur,
when lock_sock() or release_sock() dereferences the ax25_cb->sock.

The NULL pointer dereference bug can be shown as below:

ax25_kill_by_device()        | ax25_release()
                             |   ax25_destroy_socket()
                             |     ax25_cb_del()
  ...                        |     ...
                             |     ax25->sk=NULL;
  lock_sock(s->sk); //(1)    |
  s->ax25_dev = NULL;        |     ...
  release_sock(s->sk); //(2) |
  ...                        |

The root cause is that the sock is set to null before dereference
site (1) or (2). Therefore, this patch extracts the ax25_cb->sock
in advance, and uses ax25_list_lock to protect it, which can synchronize
with ax25_cb_del() and ensure the value of sock is not null before
dereference sites.

The concurrency UAF bug can be shown as below:

ax25_kill_by_device()        | ax25_release()
                             |   ax25_destroy_socket()
  ...                        |   ...
                             |   sock_put(sk); //FREE
  lock_sock(s->sk); //(1)    |
  s->ax25_dev = NULL;        |   ...
  release_sock(s->sk); //(2) |
  ...                        |

The root cause is that the sock is released before dereference
site (1) or (2). Therefore, this patch uses sock_hold() to increase
the refcount of sock and uses ax25_list_lock to protect it, which
can synchronize with ax25_cb_del() in ax25_destroy_socket() and
ensure the sock wil not be released before dereference sites.

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 5d05d8dca168..dccfd9c2086f 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -80,6 +80,7 @@ static void ax25_kill_by_device(struct net_device *dev)
 {
 	ax25_dev *ax25_dev;
 	ax25_cb *s;
+	struct sock *sk;
 
 	if ((ax25_dev = ax25_dev_ax25dev(dev)) == NULL)
 		return;
@@ -88,13 +89,15 @@ static void ax25_kill_by_device(struct net_device *dev)
 again:
 	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
+			sk = s->sk;
+			sock_hold(sk);
 			spin_unlock_bh(&ax25_list_lock);
-			lock_sock(s->sk);
+			lock_sock(sk);
 			s->ax25_dev = NULL;
-			release_sock(s->sk);
+			release_sock(sk);
 			ax25_disconnect(s, ENETUNREACH);
 			spin_lock_bh(&ax25_list_lock);
-
+			sock_put(sk);
 			/* The entry could have been deleted from the
 			 * list meanwhile and thus the next pointer is
 			 * no longer valid.  Play it safe and restart
-- 
Gitee


From 1cb76aa23d195bf9802e8d01891abd16a20c5cf1 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 22 Apr 2022 13:01:23 +0000
Subject: [PATCH 263/340] ax25: Fix NULL pointer dereference in
 ax25_kill_by_device

stable inclusion
from linux-4.19.235
commit 5ab8de9377edde3eaf1de9872e2f01d43157cd6c
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I53VJO
CVE: CVE-2022-1199

--------------------------------

[ Upstream commit 71171ac8eb34ce7fe6b3267dce27c313ab3cb3ac ]

When two ax25 devices attempted to establish connection, the requester use ax25_create(),
ax25_bind() and ax25_connect() to initiate connection. The receiver use ax25_rcv() to
accept connection and use ax25_create_cb() in ax25_rcv() to create ax25_cb, but the
ax25_cb->sk is NULL. When the receiver is detaching, a NULL pointer dereference bug
caused by sock_hold(sk) in ax25_kill_by_device() will happen. The corresponding
fail log is shown below:

===============================================================
BUG: KASAN: null-ptr-deref in ax25_device_event+0xfd/0x290
Call Trace:
...
ax25_device_event+0xfd/0x290
raw_notifier_call_chain+0x5e/0x70
dev_close_many+0x174/0x220
unregister_netdevice_many+0x1f7/0xa60
unregister_netdevice_queue+0x12f/0x170
unregister_netdev+0x13/0x20
mkiss_close+0xcd/0x140
tty_ldisc_release+0xc0/0x220
tty_release_struct+0x17/0xa0
tty_release+0x62d/0x670
...

This patch add condition check in ax25_kill_by_device(). If s->sk is
NULL, it will goto if branch to kill device.

Fixes: 4e0f718daf97 ("ax25: improve the incomplete fix to avoid UAF and NPD bugs")
Reported-by: Thomas Osterried <thomas@osterried.de>
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index dccfd9c2086f..37a333d0ee03 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -90,6 +90,13 @@ static void ax25_kill_by_device(struct net_device *dev)
 	ax25_for_each(s, &ax25_list) {
 		if (s->ax25_dev == ax25_dev) {
 			sk = s->sk;
+			if (!sk) {
+				spin_unlock_bh(&ax25_list_lock);
+				s->ax25_dev = NULL;
+				ax25_disconnect(s, ENETUNREACH);
+				spin_lock_bh(&ax25_list_lock);
+				goto again;
+			}
 			sock_hold(sk);
 			spin_unlock_bh(&ax25_list_lock);
 			lock_sock(sk);
-- 
Gitee


From b65eebc6e33ad2a8ee77a59b8931c407d41be9e2 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 22 Apr 2022 13:01:24 +0000
Subject: [PATCH 264/340] ax25: fix NPD bug in ax25_disconnect

mainline inclusion
from mainline-v5.17-rc4
commit 7ec02f5ac8a5be5a3f20611731243dc5e1d9ba10
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I53VJO
CVE: CVE-2022-1199

--------------------------------

The ax25_disconnect() in ax25_kill_by_device() is not
protected by any locks, thus there is a race condition
between ax25_disconnect() and ax25_destroy_socket().
when ax25->sk is assigned as NULL by ax25_destroy_socket(),
a NULL pointer dereference bug will occur if site (1) or (2)
dereferences ax25->sk.

ax25_kill_by_device()                | ax25_release()
  ax25_disconnect()                  |   ax25_destroy_socket()
    ...                              |
    if(ax25->sk != NULL)             |     ...
      ...                            |     ax25->sk = NULL;
      bh_lock_sock(ax25->sk); //(1)  |     ...
      ...                            |
      bh_unlock_sock(ax25->sk); //(2)|

This patch moves ax25_disconnect() into lock_sock(), which can
synchronize with ax25_destroy_socket() in ax25_release().

Fail log:

===============================================================
BUG: kernel NULL pointer dereference, address: 0000000000000088
...
RIP: 0010:_raw_spin_lock+0x7e/0xd0
...
Call Trace:
ax25_disconnect+0xf6/0x220
ax25_device_event+0x187/0x250
raw_notifier_call_chain+0x5e/0x70
dev_close_many+0x17d/0x230
rollback_registered_many+0x1f1/0x950
unregister_netdevice_queue+0x133/0x200
unregister_netdev+0x13/0x20
...

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Conflict:
	net/ax25/af_ax25.c
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 37a333d0ee03..3a5812c1a098 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -101,8 +101,8 @@ static void ax25_kill_by_device(struct net_device *dev)
 			spin_unlock_bh(&ax25_list_lock);
 			lock_sock(sk);
 			s->ax25_dev = NULL;
-			release_sock(sk);
 			ax25_disconnect(s, ENETUNREACH);
+			release_sock(sk);
 			spin_lock_bh(&ax25_list_lock);
 			sock_put(sk);
 			/* The entry could have been deleted from the
-- 
Gitee


From 98802b106005c318a9a364f943ca6bd1b1ef07ed Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 22 Apr 2022 13:01:25 +0000
Subject: [PATCH 265/340] ax25: Fix NULL pointer dereferences in ax25 timers

mainline inclusion
from mainline-v5.18-rc1
commit fc6d01ff9ef03b66d4a3a23b46fc3c3d8cf92009
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I53VJO
CVE: CVE-2022-1205

--------------------------------

The previous commit 7ec02f5ac8a5 ("ax25: fix NPD bug in ax25_disconnect")
move ax25_disconnect into lock_sock() in order to prevent NPD bugs. But
there are race conditions that may lead to null pointer dereferences in
ax25_heartbeat_expiry(), ax25_t1timer_expiry(), ax25_t2timer_expiry(),
ax25_t3timer_expiry() and ax25_idletimer_expiry(), when we use
ax25_kill_by_device() to detach the ax25 device.

One of the race conditions that cause null pointer dereferences can be
shown as below:

      (Thread 1)                    |      (Thread 2)
ax25_connect()                      |
 ax25_std_establish_data_link()     |
  ax25_start_t1timer()              |
   mod_timer(&ax25->t1timer,..)     |
                                    | ax25_kill_by_device()
   (wait a time)                    |  ...
                                    |  s->ax25_dev = NULL; //(1)
   ax25_t1timer_expiry()            |
    ax25->ax25_dev->values[..] //(2)|  ...
     ...                            |

We set null to ax25_cb->ax25_dev in position (1) and dereference
the null pointer in position (2).

The corresponding fail log is shown below:

===============================================================
BUG: kernel NULL pointer dereference, address: 0000000000000050
CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.17.0-rc6-00794-g45690b7d0
RIP: 0010:ax25_t1timer_expiry+0x12/0x40
...
Call Trace:
 call_timer_fn+0x21/0x120
 __run_timers.part.0+0x1ca/0x250
 run_timer_softirq+0x2c/0x60
 __do_softirq+0xef/0x2f3
 irq_exit_rcu+0xb6/0x100
 sysvec_apic_timer_interrupt+0xa2/0xd0
...

This patch moves ax25_disconnect() before s->ax25_dev = NULL
and uses del_timer_sync() to delete timers in ax25_disconnect().
If ax25_disconnect() is called by ax25_kill_by_device() or
ax25->ax25_dev is NULL, the reason in ax25_disconnect() will be
equal to ENETUNREACH, it will wait all timers to stop before we
set null to s->ax25_dev in ax25_kill_by_device().

Fixes: 7ec02f5ac8a5 ("ax25: fix NPD bug in ax25_disconnect")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Conflict:
	net/ax25/af_ax25.c
Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c   |  4 ++--
 net/ax25/ax25_subr.c | 20 ++++++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 3a5812c1a098..2b0bee2dbbb7 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -92,16 +92,16 @@ static void ax25_kill_by_device(struct net_device *dev)
 			sk = s->sk;
 			if (!sk) {
 				spin_unlock_bh(&ax25_list_lock);
-				s->ax25_dev = NULL;
 				ax25_disconnect(s, ENETUNREACH);
+				s->ax25_dev = NULL;
 				spin_lock_bh(&ax25_list_lock);
 				goto again;
 			}
 			sock_hold(sk);
 			spin_unlock_bh(&ax25_list_lock);
 			lock_sock(sk);
-			s->ax25_dev = NULL;
 			ax25_disconnect(s, ENETUNREACH);
+			s->ax25_dev = NULL;
 			release_sock(sk);
 			spin_lock_bh(&ax25_list_lock);
 			sock_put(sk);
diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c
index 038b109b2be7..c129865cad9f 100644
--- a/net/ax25/ax25_subr.c
+++ b/net/ax25/ax25_subr.c
@@ -264,12 +264,20 @@ void ax25_disconnect(ax25_cb *ax25, int reason)
 {
 	ax25_clear_queues(ax25);
 
-	if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
-		ax25_stop_heartbeat(ax25);
-	ax25_stop_t1timer(ax25);
-	ax25_stop_t2timer(ax25);
-	ax25_stop_t3timer(ax25);
-	ax25_stop_idletimer(ax25);
+	if (reason == ENETUNREACH) {
+		del_timer_sync(&ax25->timer);
+		del_timer_sync(&ax25->t1timer);
+		del_timer_sync(&ax25->t2timer);
+		del_timer_sync(&ax25->t3timer);
+		del_timer_sync(&ax25->idletimer);
+	} else {
+		if (!ax25->sk || !sock_flag(ax25->sk, SOCK_DESTROY))
+			ax25_stop_heartbeat(ax25);
+		ax25_stop_t1timer(ax25);
+		ax25_stop_t2timer(ax25);
+		ax25_stop_t3timer(ax25);
+		ax25_stop_idletimer(ax25);
+	}
 
 	ax25->state = AX25_STATE_0;
 
-- 
Gitee


From 0eba1f6eb46b33dc5a220cd812b4f951f1ca4360 Mon Sep 17 00:00:00 2001
From: Yang Jihong <yangjihong1@huawei.com>
Date: Sun, 24 Apr 2022 02:21:32 +0000
Subject: [PATCH 266/340] Revert "perf: Paper over the hw.target problems"

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I53VHE
CVE: NA

--------------------------------

This reverts commit 0380474221530db9147a001034794a95fb4c46c1.

This patch is used to solve race between close() and fork() of the perf.
However, this patch is not accepted by the community. As a result,
destory interface is incorrectly invoked during the perf_remove_from_context,
causing UAF, see https://lkml.org/lkml/2019/6/28/856.

For 4.19 kernel, he final fix patch has been incorporated, see eb41044bbece4.
Therefore, need to revert the patch.

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Reviewed-by: Kuohai Xu <xukuohai@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/events/core.c | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index deba52307349..8dc07a529e6d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -2135,28 +2135,6 @@ static void perf_remove_from_context(struct perf_event *event, unsigned long fla
 
 	event_function_call(event, __perf_remove_from_context, (void *)flags);
 
-	/*
-	 * This is as passable as any hw.target handling out there;
-	 * hw.target implies task context, therefore, no migration.
-	 * Which together with DETACH_GROUP means that this is the
-	 * final remove_from_context of a task event.
-	 */
-	if (event->hw.target && (flags & DETACH_GROUP)) {
-		/*
-		 * Now, the problem with, say uprobes, is that they
-		 * use hw.target for context in their ->destroy()
-		 * callbacks. Supposedly, they may need to poke at
-		 * its contents, so better call it while we still
-		 * have the task.
-		 */
-		if (event->destroy) {
-			event->destroy(event);
-			event->destroy = NULL;
-		}
-		put_task_struct(event->hw.target);
-		event->hw.target = NULL;
-	}
-
 	/*
 	 * The above event_function_call() can NO-OP when it hits
 	 * TASK_TOMBSTONE. In that case we must already have been detached
-- 
Gitee


From b1ee3b19cbd9901aa3965f5eb85a52fe2cb59cf1 Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Tue, 26 Apr 2022 11:31:55 +0000
Subject: [PATCH 267/340] can: usb_8dev: usb_8dev_start_xmit(): fix double
 dev_kfree_skb() in error path

mainline inclusion
from mainline-v5.18-rc1
commit 3d3925ff6433f98992685a9679613a2cc97f3ce2
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBQ
CVE: CVE-2022-28388

-------------------------------------------------

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
because can_put_echo_skb() deletes original skb and
can_free_echo_skb() deletes the cloned skb.

Fixes: 0024d8ad1639 ("can: usb_8dev: Add support for USB2CAN interface from 8 devices")
Link: https://lore.kernel.org/all/20220311080614.45229-1-hbh25y@gmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>

Conflicts:
	drivers/net/can/usb/usb_8dev.c

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/can/usb/usb_8dev.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)

diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c
index 3e4416473607..9cd67262a5ed 100644
--- a/drivers/net/can/usb/usb_8dev.c
+++ b/drivers/net/can/usb/usb_8dev.c
@@ -680,9 +680,20 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
 	atomic_inc(&priv->active_tx_urbs);
 
 	err = usb_submit_urb(urb, GFP_ATOMIC);
-	if (unlikely(err))
-		goto failed;
-	else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
+	if (unlikely(err)) {
+		can_free_echo_skb(netdev, context->echo_index);
+
+		usb_unanchor_urb(urb);
+		usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
+
+		atomic_dec(&priv->active_tx_urbs);
+
+		if (err == -ENODEV)
+			netif_device_detach(netdev);
+		else
+			netdev_warn(netdev, "failed tx_urb %d\n", err);
+		stats->tx_dropped++;
+	} else if (atomic_read(&priv->active_tx_urbs) >= MAX_TX_URBS)
 		/* Slow down tx path */
 		netif_stop_queue(netdev);
 
@@ -701,19 +712,6 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb,
 
 	return NETDEV_TX_BUSY;
 
-failed:
-	can_free_echo_skb(netdev, context->echo_index);
-
-	usb_unanchor_urb(urb);
-	usb_free_coherent(priv->udev, size, buf, urb->transfer_dma);
-
-	atomic_dec(&priv->active_tx_urbs);
-
-	if (err == -ENODEV)
-		netif_device_detach(netdev);
-	else
-		netdev_warn(netdev, "failed tx_urb %d\n", err);
-
 nomembuf:
 	usb_free_urb(urb);
 
-- 
Gitee


From 1f3f6c4947da96df9b723821a82634cdc9100d18 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:12:54 +0000
Subject: [PATCH 268/340] ARM: report Spectre v2 status through sysfs

stable inclusion
from stable-v4.19.234
commit dc64af755099d1e51fd64e99fe3a59b75595814a
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 9dd78194a3722fa6712192cdd4f7032d45112a9a upstream.

As per other architectures, add support for reporting the Spectre
vulnerability status via sysfs CPU.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
[ preserve res variable and add SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED - gregkh ]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/include/asm/spectre.h |  28 +++++++
 arch/arm/kernel/Makefile       |   2 +
 arch/arm/kernel/spectre.c      |  54 ++++++++++++++
 arch/arm/mm/Kconfig            |   1 +
 arch/arm/mm/proc-v7-bugs.c     | 132 +++++++++++++++++++++++++--------
 5 files changed, 186 insertions(+), 31 deletions(-)
 create mode 100644 arch/arm/include/asm/spectre.h
 create mode 100644 arch/arm/kernel/spectre.c

diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
new file mode 100644
index 000000000000..8a9019e08dba
--- /dev/null
+++ b/arch/arm/include/asm/spectre.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_SPECTRE_H
+#define __ASM_SPECTRE_H
+
+enum {
+	SPECTRE_UNAFFECTED,
+	SPECTRE_MITIGATED,
+	SPECTRE_VULNERABLE,
+};
+
+enum {
+	__SPECTRE_V2_METHOD_BPIALL,
+	__SPECTRE_V2_METHOD_ICIALLU,
+	__SPECTRE_V2_METHOD_SMC,
+	__SPECTRE_V2_METHOD_HVC,
+};
+
+enum {
+	SPECTRE_V2_METHOD_BPIALL = BIT(__SPECTRE_V2_METHOD_BPIALL),
+	SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU),
+	SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC),
+	SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC),
+};
+
+void spectre_v2_update_state(unsigned int state, unsigned int methods);
+
+#endif
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 8cad59465af3..70fd896c132a 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -102,4 +102,6 @@ endif
 
 obj-$(CONFIG_HAVE_ARM_SMCCC)	+= smccc-call.o
 
+obj-$(CONFIG_GENERIC_CPU_VULNERABILITIES) += spectre.o
+
 extra-y := $(head-y) vmlinux.lds
diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
new file mode 100644
index 000000000000..6f6dd1cfd099
--- /dev/null
+++ b/arch/arm/kernel/spectre.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/cpu.h>
+#include <linux/device.h>
+
+#include <asm/spectre.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+static unsigned int spectre_v2_state;
+static unsigned int spectre_v2_methods;
+
+void spectre_v2_update_state(unsigned int state, unsigned int method)
+{
+	if (state > spectre_v2_state)
+		spectre_v2_state = state;
+	spectre_v2_methods |= method;
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	const char *method;
+
+	if (spectre_v2_state == SPECTRE_UNAFFECTED)
+		return sprintf(buf, "%s\n", "Not affected");
+
+	if (spectre_v2_state != SPECTRE_MITIGATED)
+		return sprintf(buf, "%s\n", "Vulnerable");
+
+	switch (spectre_v2_methods) {
+	case SPECTRE_V2_METHOD_BPIALL:
+		method = "Branch predictor hardening";
+		break;
+
+	case SPECTRE_V2_METHOD_ICIALLU:
+		method = "I-cache invalidation";
+		break;
+
+	case SPECTRE_V2_METHOD_SMC:
+	case SPECTRE_V2_METHOD_HVC:
+		method = "Firmware call";
+		break;
+
+	default:
+		method = "Multiple mitigations";
+		break;
+	}
+
+	return sprintf(buf, "Mitigation: %s\n", method);
+}
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index b169e580bf82..2069a1b583f0 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -823,6 +823,7 @@ config CPU_BPREDICT_DISABLE
 
 config CPU_SPECTRE
 	bool
+	select GENERIC_CPU_VULNERABILITIES
 
 config HARDEN_BRANCH_PREDICTOR
 	bool "Harden the branch predictor against aliasing attacks" if EXPERT
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index ac63a31347c1..7f9446c1f0cc 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -7,8 +7,36 @@
 #include <asm/cp15.h>
 #include <asm/cputype.h>
 #include <asm/proc-fns.h>
+#include <asm/spectre.h>
 #include <asm/system_misc.h>
 
+#ifdef CONFIG_ARM_PSCI
+#define SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED	1
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+	struct arm_smccc_res res;
+
+	arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
+			     ARM_SMCCC_ARCH_WORKAROUND_1, &res);
+
+	switch ((int)res.a0) {
+	case SMCCC_RET_SUCCESS:
+		return SPECTRE_MITIGATED;
+
+	case SMCCC_ARCH_WORKAROUND_RET_UNAFFECTED:
+		return SPECTRE_UNAFFECTED;
+
+	default:
+		return SPECTRE_VULNERABLE;
+	}
+}
+#else
+static int __maybe_unused spectre_v2_get_cpu_fw_mitigation_state(void)
+{
+	return SPECTRE_VULNERABLE;
+}
+#endif
+
 #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
 DEFINE_PER_CPU(harden_branch_predictor_fn_t, harden_branch_predictor_fn);
 
@@ -37,13 +65,60 @@ static void __maybe_unused call_hvc_arch_workaround_1(void)
 	arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL);
 }
 
-static void cpu_v7_spectre_init(void)
+static unsigned int spectre_v2_install_workaround(unsigned int method)
 {
 	const char *spectre_v2_method = NULL;
 	int cpu = smp_processor_id();
 
 	if (per_cpu(harden_branch_predictor_fn, cpu))
-		return;
+		return SPECTRE_MITIGATED;
+
+	switch (method) {
+	case SPECTRE_V2_METHOD_BPIALL:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_bpiall;
+		spectre_v2_method = "BPIALL";
+		break;
+
+	case SPECTRE_V2_METHOD_ICIALLU:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			harden_branch_predictor_iciallu;
+		spectre_v2_method = "ICIALLU";
+		break;
+
+	case SPECTRE_V2_METHOD_HVC:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			call_hvc_arch_workaround_1;
+		cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
+		spectre_v2_method = "hypervisor";
+		break;
+
+	case SPECTRE_V2_METHOD_SMC:
+		per_cpu(harden_branch_predictor_fn, cpu) =
+			call_smc_arch_workaround_1;
+		cpu_do_switch_mm = cpu_v7_smc_switch_mm;
+		spectre_v2_method = "firmware";
+		break;
+	}
+
+	if (spectre_v2_method)
+		pr_info("CPU%u: Spectre v2: using %s workaround\n",
+			smp_processor_id(), spectre_v2_method);
+
+	return SPECTRE_MITIGATED;
+}
+#else
+static unsigned int spectre_v2_install_workaround(unsigned int method)
+{
+	pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n");
+
+	return SPECTRE_VULNERABLE;
+}
+#endif
+
+static void cpu_v7_spectre_v2_init(void)
+{
+	unsigned int state, method = 0;
 
 	switch (read_cpuid_part()) {
 	case ARM_CPU_PART_CORTEX_A8:
@@ -52,32 +127,37 @@ static void cpu_v7_spectre_init(void)
 	case ARM_CPU_PART_CORTEX_A17:
 	case ARM_CPU_PART_CORTEX_A73:
 	case ARM_CPU_PART_CORTEX_A75:
-		per_cpu(harden_branch_predictor_fn, cpu) =
-			harden_branch_predictor_bpiall;
-		spectre_v2_method = "BPIALL";
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_BPIALL;
 		break;
 
 	case ARM_CPU_PART_CORTEX_A15:
 	case ARM_CPU_PART_BRAHMA_B15:
-		per_cpu(harden_branch_predictor_fn, cpu) =
-			harden_branch_predictor_iciallu;
-		spectre_v2_method = "ICIALLU";
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_ICIALLU;
 		break;
 
-#ifdef CONFIG_ARM_PSCI
 	case ARM_CPU_PART_BRAHMA_B53:
 		/* Requires no workaround */
+		state = SPECTRE_UNAFFECTED;
 		break;
+
 	default:
 		/* Other ARM CPUs require no workaround */
-		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM)
+		if (read_cpuid_implementor() == ARM_CPU_IMP_ARM) {
+			state = SPECTRE_UNAFFECTED;
 			break;
+		}
 		/* fallthrough */
-		/* Cortex A57/A72 require firmware workaround */
+	/* Cortex A57/A72 require firmware workaround */
 	case ARM_CPU_PART_CORTEX_A57:
 	case ARM_CPU_PART_CORTEX_A72: {
 		struct arm_smccc_res res;
 
+		state = spectre_v2_get_cpu_fw_mitigation_state();
+		if (state != SPECTRE_MITIGATED)
+			break;
+
 		if (psci_ops.smccc_version == SMCCC_VERSION_1_0)
 			break;
 
@@ -88,35 +168,25 @@ static void cpu_v7_spectre_init(void)
 
 		switch (psci_ops.conduit) {
 		case PSCI_CONDUIT_HVC:
-			per_cpu(harden_branch_predictor_fn, cpu) =
-				call_hvc_arch_workaround_1;
-			cpu_do_switch_mm = cpu_v7_hvc_switch_mm;
-			spectre_v2_method = "hypervisor";
+			method = SPECTRE_V2_METHOD_HVC;
 			break;
 
 		case PSCI_CONDUIT_SMC:
-			per_cpu(harden_branch_predictor_fn, cpu) =
-				call_smc_arch_workaround_1;
-			cpu_do_switch_mm = cpu_v7_smc_switch_mm;
-			spectre_v2_method = "firmware";
+			method = SPECTRE_V2_METHOD_SMC;
 			break;
 
 		default:
+			state = SPECTRE_VULNERABLE;
 			break;
 		}
 	}
-#endif
 	}
 
-	if (spectre_v2_method)
-		pr_info("CPU%u: Spectre v2: using %s workaround\n",
-			smp_processor_id(), spectre_v2_method);
-}
-#else
-static void cpu_v7_spectre_init(void)
-{
+	if (state == SPECTRE_MITIGATED)
+		state = spectre_v2_install_workaround(method);
+
+	spectre_v2_update_state(state, method);
 }
-#endif
 
 static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
 						  u32 mask, const char *msg)
@@ -146,16 +216,16 @@ static bool check_spectre_auxcr(bool *warned, u32 bit)
 void cpu_v7_ca8_ibe(void)
 {
 	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(6)))
-		cpu_v7_spectre_init();
+		cpu_v7_spectre_v2_init();
 }
 
 void cpu_v7_ca15_ibe(void)
 {
 	if (check_spectre_auxcr(this_cpu_ptr(&spectre_warned), BIT(0)))
-		cpu_v7_spectre_init();
+		cpu_v7_spectre_v2_init();
 }
 
 void cpu_v7_bugs_init(void)
 {
-	cpu_v7_spectre_init();
+	cpu_v7_spectre_v2_init();
 }
-- 
Gitee


From abcacb285da806e293dd7cf1653a0c159d438e9f Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:12:55 +0000
Subject: [PATCH 269/340] ARM: early traps initialisation

stable inclusion
from stable-v4.19.234
commit 45c25917ceb7a5377883ef4c3a675276fba8a268
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 04e91b7324760a377a725e218b5ee783826d30f5 upstream.

Provide a couple of helpers to copy the vectors and stubs, and also
to flush the copied vectors and stubs.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/traps.c | 27 +++++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 2a21239aec91..674cf872cccc 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -828,10 +828,22 @@ static inline void __init kuser_init(void *vectors)
 }
 #endif
 
+#ifndef CONFIG_CPU_V7M
+static void copy_from_lma(void *vma, void *lma_start, void *lma_end)
+{
+	memcpy(vma, lma_start, lma_end - lma_start);
+}
+
+static void flush_vectors(void *vma, size_t offset, size_t size)
+{
+	unsigned long start = (unsigned long)vma + offset;
+	unsigned long end = start + size;
+
+	flush_icache_range(start, end);
+}
+
 void __init early_trap_init(void *vectors_base)
 {
-#ifndef CONFIG_CPU_V7M
-	unsigned long vectors = (unsigned long)vectors_base;
 	extern char __stubs_start[], __stubs_end[];
 	extern char __vectors_start[], __vectors_end[];
 	unsigned i;
@@ -852,17 +864,20 @@ void __init early_trap_init(void *vectors_base)
 	 * into the vector page, mapped at 0xffff0000, and ensure these
 	 * are visible to the instruction stream.
 	 */
-	memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start);
-	memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start);
+	copy_from_lma(vectors_base, __vectors_start, __vectors_end);
+	copy_from_lma(vectors_base + 0x1000, __stubs_start, __stubs_end);
 
 	kuser_init(vectors_base);
 
-	flush_icache_range(vectors, vectors + PAGE_SIZE * 2);
+	flush_vectors(vectors_base, 0, PAGE_SIZE * 2);
+}
 #else /* ifndef CONFIG_CPU_V7M */
+void __init early_trap_init(void *vectors_base)
+{
 	/*
 	 * on V7-M there is no need to copy the vector table to a dedicated
 	 * memory area. The address is configurable and so a table in the kernel
 	 * image can be used.
 	 */
-#endif
 }
+#endif
-- 
Gitee


From 6700224fee3048474967b952cfdafe3087837394 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:12:56 +0000
Subject: [PATCH 270/340] ARM: use LOADADDR() to get load address of sections

stable inclusion
from stable-v4.19.234
commit 67e1f18a972be16363c6e88d7b29cde880774164
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 8d9d651ff2270a632e9dc497b142db31e8911315 upstream.

Use the linker's LOADADDR() macro to get the load address of the
sections, and provide a macro to set the start and end symbols.

Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/vmlinux.lds.h | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 8247bc15addc..64c052783514 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -25,6 +25,11 @@
 #define ARM_MMU_DISCARD(x)	x
 #endif
 
+/* Set start/end symbol names to the LMA for the section */
+#define ARM_LMA(sym, section)						\
+	sym##_start = LOADADDR(section);				\
+	sym##_end = LOADADDR(section) + SIZEOF(section)
+
 #define PROC_INFO							\
 		. = ALIGN(4);						\
 		__proc_info_begin = .;					\
@@ -100,19 +105,19 @@
  * only thing that matters is their relative offsets
  */
 #define ARM_VECTORS							\
-	__vectors_start = .;						\
+	__vectors_lma = .;						\
 	.vectors 0xffff0000 : AT(__vectors_start) {			\
 		*(.vectors)						\
 	}								\
-	. = __vectors_start + SIZEOF(.vectors);				\
-	__vectors_end = .;						\
+	ARM_LMA(__vectors, .vectors);					\
+	. = __vectors_lma + SIZEOF(.vectors);				\
 									\
-	__stubs_start = .;						\
-	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_start) {		\
+	__stubs_lma = .;						\
+	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) {		\
 		*(.stubs)						\
 	}								\
-	. = __stubs_start + SIZEOF(.stubs);				\
-	__stubs_end = .;						\
+	ARM_LMA(__stubs, .stubs);					\
+	. = __stubs_lma + SIZEOF(.stubs);				\
 									\
 	PROVIDE(vector_fiq_offset = vector_fiq - ADDR(.vectors));
 
-- 
Gitee


From 7d063048fddf5f15eb48399a4bfdc8957399919d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:12:57 +0000
Subject: [PATCH 271/340] ARM: Spectre-BHB workaround

stable inclusion
from stable-v4.19.234
commit 99e14db3b711c27f93079ba9d7f2fff169916d5f
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit b9baf5c8c5c356757f4f9d8180b5e9d234065bc3 upstream.

Workaround the Spectre BHB issues for Cortex-A15, Cortex-A57,
Cortex-A72, Cortex-A73 and Cortex-A75. We also include Brahma B15 as
well to be safe, which is affected by Spectre V2 in the same ways as
Cortex-A15.

Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
[changes due to lack of SYSTEM_FREEING_INITMEM - gregkh]
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/include/asm/assembler.h | 10 ++++
 arch/arm/include/asm/spectre.h   |  4 ++
 arch/arm/kernel/entry-armv.S     | 79 +++++++++++++++++++++++++++++---
 arch/arm/kernel/entry-common.S   | 24 ++++++++++
 arch/arm/kernel/spectre.c        |  4 ++
 arch/arm/kernel/traps.c          | 38 +++++++++++++++
 arch/arm/kernel/vmlinux.lds.h    | 18 ++++++--
 arch/arm/mm/Kconfig              | 10 ++++
 arch/arm/mm/proc-v7-bugs.c       | 76 ++++++++++++++++++++++++++++++
 9 files changed, 254 insertions(+), 9 deletions(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 88286dd483ff..36ce2c1c982f 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -110,6 +110,16 @@
 	.endm
 #endif
 
+#if __LINUX_ARM_ARCH__ < 7
+	.macro	dsb, args
+	mcr	p15, 0, r0, c7, c10, 4
+	.endm
+
+	.macro	isb, args
+	mcr	p15, 0, r0, c7, r5, 4
+	.endm
+#endif
+
 	.macro asm_trace_hardirqs_off, save=1
 #if defined(CONFIG_TRACE_IRQFLAGS)
 	.if \save
diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
index 8a9019e08dba..d1fa5607d3aa 100644
--- a/arch/arm/include/asm/spectre.h
+++ b/arch/arm/include/asm/spectre.h
@@ -14,6 +14,7 @@ enum {
 	__SPECTRE_V2_METHOD_ICIALLU,
 	__SPECTRE_V2_METHOD_SMC,
 	__SPECTRE_V2_METHOD_HVC,
+	__SPECTRE_V2_METHOD_LOOP8,
 };
 
 enum {
@@ -21,8 +22,11 @@ enum {
 	SPECTRE_V2_METHOD_ICIALLU = BIT(__SPECTRE_V2_METHOD_ICIALLU),
 	SPECTRE_V2_METHOD_SMC = BIT(__SPECTRE_V2_METHOD_SMC),
 	SPECTRE_V2_METHOD_HVC = BIT(__SPECTRE_V2_METHOD_HVC),
+	SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8),
 };
 
 void spectre_v2_update_state(unsigned int state, unsigned int methods);
 
+int spectre_bhb_update_vectors(unsigned int method);
+
 #endif
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index e85a3af9ddeb..ffcff378e3c8 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1038,12 +1038,11 @@ vector_\name:
 	sub	lr, lr, #\correction
 	.endif
 
-	@
-	@ Save r0, lr_<exception> (parent PC) and spsr_<exception>
-	@ (parent CPSR)
-	@
+	@ Save r0, lr_<exception> (parent PC)
 	stmia	sp, {r0, lr}		@ save r0, lr
-	mrs	lr, spsr
+
+	@ Save spsr_<exception> (parent CPSR)
+2:	mrs	lr, spsr
 	str	lr, [sp, #8]		@ save spsr
 
 	@
@@ -1064,6 +1063,44 @@ vector_\name:
 	movs	pc, lr			@ branch to handler in SVC mode
 ENDPROC(vector_\name)
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.subsection 1
+	.align 5
+vector_bhb_loop8_\name:
+	.if \correction
+	sub	lr, lr, #\correction
+	.endif
+
+	@ Save r0, lr_<exception> (parent PC)
+	stmia	sp, {r0, lr}
+
+	@ bhb workaround
+	mov	r0, #8
+1:	b	. + 4
+	subs	r0, r0, #1
+	bne	1b
+	dsb
+	isb
+	b	2b
+ENDPROC(vector_bhb_loop8_\name)
+
+vector_bhb_bpiall_\name:
+	.if \correction
+	sub	lr, lr, #\correction
+	.endif
+
+	@ Save r0, lr_<exception> (parent PC)
+	stmia	sp, {r0, lr}
+
+	@ bhb workaround
+	mcr	p15, 0, r0, c7, c5, 6	@ BPIALL
+	@ isb not needed due to "movs pc, lr" in the vector stub
+	@ which gives a "context synchronisation".
+	b	2b
+ENDPROC(vector_bhb_bpiall_\name)
+	.previous
+#endif
+
 	.align	2
 	@ handler addresses follow this label
 1:
@@ -1072,6 +1109,10 @@ ENDPROC(vector_\name)
 	.section .stubs, "ax", %progbits
 	@ This must be the first word
 	.word	vector_swi
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.word	vector_bhb_loop8_swi
+	.word	vector_bhb_bpiall_swi
+#endif
 
 vector_rst:
  ARM(	swi	SYS_ERROR0	)
@@ -1186,8 +1227,10 @@ vector_addrexcptn:
  * FIQ "NMI" handler
  *-----------------------------------------------------------------------------
  * Handle a FIQ using the SVC stack allowing FIQ act like NMI on x86
- * systems.
+ * systems. This must be the last vector stub, so lets place it in its own
+ * subsection.
  */
+	.subsection 2
 	vector_stub	fiq, FIQ_MODE, 4
 
 	.long	__fiq_usr			@  0  (USR_26 / USR_32)
@@ -1220,6 +1263,30 @@ vector_addrexcptn:
 	W(b)	vector_irq
 	W(b)	vector_fiq
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+	.section .vectors.bhb.loop8, "ax", %progbits
+.L__vectors_bhb_loop8_start:
+	W(b)	vector_rst
+	W(b)	vector_bhb_loop8_und
+	W(ldr)	pc, .L__vectors_bhb_loop8_start + 0x1004
+	W(b)	vector_bhb_loop8_pabt
+	W(b)	vector_bhb_loop8_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_bhb_loop8_irq
+	W(b)	vector_bhb_loop8_fiq
+
+	.section .vectors.bhb.bpiall, "ax", %progbits
+.L__vectors_bhb_bpiall_start:
+	W(b)	vector_rst
+	W(b)	vector_bhb_bpiall_und
+	W(ldr)	pc, .L__vectors_bhb_bpiall_start + 0x1008
+	W(b)	vector_bhb_bpiall_pabt
+	W(b)	vector_bhb_bpiall_dabt
+	W(b)	vector_addrexcptn
+	W(b)	vector_bhb_bpiall_irq
+	W(b)	vector_bhb_bpiall_fiq
+#endif
+
 	.data
 	.align	2
 
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 0465d65d23de..e27fc2df5231 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -165,6 +165,29 @@ ENDPROC(ret_from_fork)
  *-----------------------------------------------------------------------------
  */
 
+	.align	5
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+ENTRY(vector_bhb_loop8_swi)
+	sub	sp, sp, #PT_REGS_SIZE
+	stmia	sp, {r0 - r12}
+	mov	r8, #8
+1:	b	2f
+2:	subs	r8, r8, #1
+	bne	1b
+	dsb
+	isb
+	b	3f
+ENDPROC(vector_bhb_loop8_swi)
+
+	.align	5
+ENTRY(vector_bhb_bpiall_swi)
+	sub	sp, sp, #PT_REGS_SIZE
+	stmia	sp, {r0 - r12}
+	mcr	p15, 0, r8, c7, c5, 6	@ BPIALL
+	isb
+	b	3f
+ENDPROC(vector_bhb_bpiall_swi)
+#endif
 	.align	5
 ENTRY(vector_swi)
 #ifdef CONFIG_CPU_V7M
@@ -172,6 +195,7 @@ ENTRY(vector_swi)
 #else
 	sub	sp, sp, #PT_REGS_SIZE
 	stmia	sp, {r0 - r12}			@ Calling r0 - r12
+3:
  ARM(	add	r8, sp, #S_PC		)
  ARM(	stmdb	r8, {sp, lr}^		)	@ Calling sp, lr
  THUMB(	mov	r8, sp			)
diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
index 6f6dd1cfd099..ade967f18d06 100644
--- a/arch/arm/kernel/spectre.c
+++ b/arch/arm/kernel/spectre.c
@@ -45,6 +45,10 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 		method = "Firmware call";
 		break;
 
+	case SPECTRE_V2_METHOD_LOOP8:
+		method = "History overwrite";
+		break;
+
 	default:
 		method = "Multiple mitigations";
 		break;
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 674cf872cccc..4c61ac4f3d35 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -33,6 +33,7 @@
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
 #include <asm/exception.h>
+#include <asm/spectre.h>
 #include <asm/unistd.h>
 #include <asm/traps.h>
 #include <asm/ptrace.h>
@@ -842,6 +843,43 @@ static void flush_vectors(void *vma, size_t offset, size_t size)
 	flush_icache_range(start, end);
 }
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+int spectre_bhb_update_vectors(unsigned int method)
+{
+	extern char __vectors_bhb_bpiall_start[], __vectors_bhb_bpiall_end[];
+	extern char __vectors_bhb_loop8_start[], __vectors_bhb_loop8_end[];
+	void *vec_start, *vec_end;
+
+	if (system_state > SYSTEM_SCHEDULING) {
+		pr_err("CPU%u: Spectre BHB workaround too late - system vulnerable\n",
+		       smp_processor_id());
+		return SPECTRE_VULNERABLE;
+	}
+
+	switch (method) {
+	case SPECTRE_V2_METHOD_LOOP8:
+		vec_start = __vectors_bhb_loop8_start;
+		vec_end = __vectors_bhb_loop8_end;
+		break;
+
+	case SPECTRE_V2_METHOD_BPIALL:
+		vec_start = __vectors_bhb_bpiall_start;
+		vec_end = __vectors_bhb_bpiall_end;
+		break;
+
+	default:
+		pr_err("CPU%u: unknown Spectre BHB state %d\n",
+		       smp_processor_id(), method);
+		return SPECTRE_VULNERABLE;
+	}
+
+	copy_from_lma(vectors_page, vec_start, vec_end);
+	flush_vectors(vectors_page, 0, vec_end - vec_start);
+
+	return SPECTRE_MITIGATED;
+}
+#endif
+
 void __init early_trap_init(void *vectors_base)
 {
 	extern char __stubs_start[], __stubs_end[];
diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 64c052783514..99fb6fec623a 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -106,11 +106,23 @@
  */
 #define ARM_VECTORS							\
 	__vectors_lma = .;						\
-	.vectors 0xffff0000 : AT(__vectors_start) {			\
-		*(.vectors)						\
+	OVERLAY 0xffff0000 : NOCROSSREFS AT(__vectors_lma) {		\
+		.vectors {						\
+			*(.vectors)					\
+		}							\
+		.vectors.bhb.loop8 {					\
+			*(.vectors.bhb.loop8)				\
+		}							\
+		.vectors.bhb.bpiall {					\
+			*(.vectors.bhb.bpiall)				\
+		}							\
 	}								\
 	ARM_LMA(__vectors, .vectors);					\
-	. = __vectors_lma + SIZEOF(.vectors);				\
+	ARM_LMA(__vectors_bhb_loop8, .vectors.bhb.loop8);		\
+	ARM_LMA(__vectors_bhb_bpiall, .vectors.bhb.bpiall);		\
+	. = __vectors_lma + SIZEOF(.vectors) +				\
+		SIZEOF(.vectors.bhb.loop8) +				\
+		SIZEOF(.vectors.bhb.bpiall);				\
 									\
 	__stubs_lma = .;						\
 	.stubs ADDR(.vectors) + 0x1000 : AT(__stubs_lma) {		\
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 2069a1b583f0..096afa16b531 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -844,6 +844,16 @@ config HARDEN_BRANCH_PREDICTOR
 
 	   If unsure, say Y.
 
+config HARDEN_BRANCH_HISTORY
+	bool "Harden Spectre style attacks against branch history" if EXPERT
+	depends on CPU_SPECTRE
+	default y
+	help
+	  Speculation attacks against some high-performance processors can
+	  make use of branch history to influence future speculation. When
+	  taking an exception, a sequence of branches overwrites the branch
+	  history, or branch history is invalidated.
+
 config TLS_REG_EMUL
 	bool
 	select NEED_KUSER_HELPERS
diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index 7f9446c1f0cc..e7557b88a995 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -188,6 +188,81 @@ static void cpu_v7_spectre_v2_init(void)
 	spectre_v2_update_state(state, method);
 }
 
+#ifdef CONFIG_HARDEN_BRANCH_HISTORY
+static int spectre_bhb_method;
+
+static const char *spectre_bhb_method_name(int method)
+{
+	switch (method) {
+	case SPECTRE_V2_METHOD_LOOP8:
+		return "loop";
+
+	case SPECTRE_V2_METHOD_BPIALL:
+		return "BPIALL";
+
+	default:
+		return "unknown";
+	}
+}
+
+static int spectre_bhb_install_workaround(int method)
+{
+	if (spectre_bhb_method != method) {
+		if (spectre_bhb_method) {
+			pr_err("CPU%u: Spectre BHB: method disagreement, system vulnerable\n",
+			       smp_processor_id());
+
+			return SPECTRE_VULNERABLE;
+		}
+
+		if (spectre_bhb_update_vectors(method) == SPECTRE_VULNERABLE)
+			return SPECTRE_VULNERABLE;
+
+		spectre_bhb_method = method;
+	}
+
+	pr_info("CPU%u: Spectre BHB: using %s workaround\n",
+		smp_processor_id(), spectre_bhb_method_name(method));
+
+	return SPECTRE_MITIGATED;
+}
+#else
+static int spectre_bhb_install_workaround(int method)
+{
+	return SPECTRE_VULNERABLE;
+}
+#endif
+
+static void cpu_v7_spectre_bhb_init(void)
+{
+	unsigned int state, method = 0;
+
+	switch (read_cpuid_part()) {
+	case ARM_CPU_PART_CORTEX_A15:
+	case ARM_CPU_PART_BRAHMA_B15:
+	case ARM_CPU_PART_CORTEX_A57:
+	case ARM_CPU_PART_CORTEX_A72:
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_LOOP8;
+		break;
+
+	case ARM_CPU_PART_CORTEX_A73:
+	case ARM_CPU_PART_CORTEX_A75:
+		state = SPECTRE_MITIGATED;
+		method = SPECTRE_V2_METHOD_BPIALL;
+		break;
+
+	default:
+		state = SPECTRE_UNAFFECTED;
+		break;
+	}
+
+	if (state == SPECTRE_MITIGATED)
+		state = spectre_bhb_install_workaround(method);
+
+	spectre_v2_update_state(state, method);
+}
+
 static __maybe_unused bool cpu_v7_check_auxcr_set(bool *warned,
 						  u32 mask, const char *msg)
 {
@@ -228,4 +303,5 @@ void cpu_v7_ca15_ibe(void)
 void cpu_v7_bugs_init(void)
 {
 	cpu_v7_spectre_v2_init();
+	cpu_v7_spectre_bhb_init();
 }
-- 
Gitee


From a16c8564c6f75602c0b4a2d55179d6867c972c48 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:12:58 +0000
Subject: [PATCH 272/340] ARM: include unprivileged BPF status in Spectre V2
 reporting

stable inclusion
from stable-v4.19.234
commit 29db7e4b67fccf5e1fe28ec89f2add90ce74d77b
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 25875aa71dfefd1959f07e626c4d285b88b27ac2 upstream.

The mitigations for Spectre-BHB are only applied when an exception
is taken, but when unprivileged BPF is enabled, userspace can
load BPF programs that can be used to exploit the problem.

When unprivileged BPF is enabled, report the vulnerable status via
the spectre_v2 sysfs file.

Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/spectre.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
index ade967f18d06..e7fea962d632 100644
--- a/arch/arm/kernel/spectre.c
+++ b/arch/arm/kernel/spectre.c
@@ -1,9 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0-only
+#include <linux/bpf.h>
 #include <linux/cpu.h>
 #include <linux/device.h>
 
 #include <asm/spectre.h>
 
+static bool _unprivileged_ebpf_enabled(void)
+{
+#ifdef CONFIG_BPF_SYSCALL
+	return !sysctl_unprivileged_bpf_disabled;
+#else
+	return false
+#endif
+}
+
 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
@@ -31,6 +41,9 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
 	if (spectre_v2_state != SPECTRE_MITIGATED)
 		return sprintf(buf, "%s\n", "Vulnerable");
 
+	if (_unprivileged_ebpf_enabled())
+		return sprintf(buf, "Vulnerable: Unprivileged eBPF enabled\n");
+
 	switch (spectre_v2_methods) {
 	case SPECTRE_V2_METHOD_BPIALL:
 		method = "Branch predictor hardening";
-- 
Gitee


From 8534f17effd5031faad823a2967249a7cca36aa4 Mon Sep 17 00:00:00 2001
From: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Date: Thu, 28 Apr 2022 08:12:59 +0000
Subject: [PATCH 273/340] ARM: fix build error when BPF_SYSCALL is disabled

stable inclusion
from stable-v4.19.234
commit fffcf80d055c4f619ae9cd8bf3a8b67319317100
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 330f4c53d3c2d8b11d86ec03a964b86dc81452f5 upstream.

It was missing a semicolon.

Signed-off-by: Emmanuel Gil Peyrot <linkmauve@linkmauve.fr>
Reviewed-by: Nathan Chancellor <nathan@kernel.org>
Fixes: 25875aa71dfe ("ARM: include unprivileged BPF status in Spectre V2 reporting").
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/spectre.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/kernel/spectre.c b/arch/arm/kernel/spectre.c
index e7fea962d632..0dcefc36fb7a 100644
--- a/arch/arm/kernel/spectre.c
+++ b/arch/arm/kernel/spectre.c
@@ -10,7 +10,7 @@ static bool _unprivileged_ebpf_enabled(void)
 #ifdef CONFIG_BPF_SYSCALL
 	return !sysctl_unprivileged_bpf_disabled;
 #else
-	return false
+	return false;
 #endif
 }
 
-- 
Gitee


From eda81c50f279aeb5a35dfaa0e2daec8bae1abf6f Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:13:00 +0000
Subject: [PATCH 274/340] ARM: fix co-processor register typo

stable inclusion
from stable-v4.19.234
commit 8dd98efe9e3f20927a2970bed5974ca8c3646c92
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 33970b031dc4653cc9dc80f2886976706c4c8ef1 upstream.

In the recent Spectre BHB patches, there was a typo that is only
exposed in certain configurations: mcr p15,0,XX,c7,r5,4 should have
been mcr p15,0,XX,c7,c5,4

Reported-by: kernel test robot <lkp@intel.com>
Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/include/asm/assembler.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 36ce2c1c982f..f903e1040b36 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -116,7 +116,7 @@
 	.endm
 
 	.macro	isb, args
-	mcr	p15, 0, r0, c7, r5, 4
+	mcr	p15, 0, r0, c7, c5, 4
 	.endm
 #endif
 
-- 
Gitee


From d4d9408b275f034e2a7d392f81ea83ec755079c1 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <nathan@kernel.org>
Date: Thu, 28 Apr 2022 08:13:01 +0000
Subject: [PATCH 275/340] ARM: Do not use NOCROSSREFS directive with ld.lld

stable inclusion
from stable-v4.19.234
commit 868d5f8a428f41da7f493a6744913b55c5c97ee2
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 36168e387fa7d0f1fe0cd5cf76c8cea7aee714fa upstream.

ld.lld does not support the NOCROSSREFS directive at the moment, which
breaks the build after commit b9baf5c8c5c3 ("ARM: Spectre-BHB
workaround"):

  ld.lld: error: ./arch/arm/kernel/vmlinux.lds:34: AT expected, but got NOCROSSREFS

Support for this directive will eventually be implemented, at which
point a version check can be added. To avoid breaking the build in the
meantime, just define NOCROSSREFS to nothing when using ld.lld, with a
link to the issue for tracking.

Cc: stable@vger.kernel.org
Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround")
Link: https://github.com/ClangBuiltLinux/linux/issues/1609
Signed-off-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/vmlinux.lds.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/arch/arm/kernel/vmlinux.lds.h b/arch/arm/kernel/vmlinux.lds.h
index 99fb6fec623a..78d156e4f008 100644
--- a/arch/arm/kernel/vmlinux.lds.h
+++ b/arch/arm/kernel/vmlinux.lds.h
@@ -25,6 +25,14 @@
 #define ARM_MMU_DISCARD(x)	x
 #endif
 
+/*
+ * ld.lld does not support NOCROSSREFS:
+ * https://github.com/ClangBuiltLinux/linux/issues/1609
+ */
+#ifdef CONFIG_LD_IS_LLD
+#define NOCROSSREFS
+#endif
+
 /* Set start/end symbol names to the LMA for the section */
 #define ARM_LMA(sym, section)						\
 	sym##_start = LOADADDR(section);				\
-- 
Gitee


From d61ef7e2bb00d95d9380a819b14a5e756b6bed42 Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:13:02 +0000
Subject: [PATCH 276/340] ARM: fix build warning in proc-v7-bugs.c

stable inclusion
from stable-v4.19.234
commit 623ca876d4a91665b7d948e399f5776c713722ea
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit b1a384d2cbccb1eb3f84765020d25e2c1929706e upstream.

The kernel test robot discovered that building without
HARDEN_BRANCH_PREDICTOR issues a warning due to a missing
argument to pr_info().

Add the missing argument.

Reported-by: kernel test robot <lkp@intel.com>
Fixes: 9dd78194a372 ("ARM: report Spectre v2 status through sysfs")
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/mm/proc-v7-bugs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c
index e7557b88a995..87a93b76e148 100644
--- a/arch/arm/mm/proc-v7-bugs.c
+++ b/arch/arm/mm/proc-v7-bugs.c
@@ -110,7 +110,8 @@ static unsigned int spectre_v2_install_workaround(unsigned int method)
 #else
 static unsigned int spectre_v2_install_workaround(unsigned int method)
 {
-	pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n");
+	pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
+		smp_processor_id());
 
 	return SPECTRE_VULNERABLE;
 }
-- 
Gitee


From f22ff10051761324031719ee2374881e896df1f6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 28 Apr 2022 08:13:03 +0000
Subject: [PATCH 277/340] ARM: Spectre-BHB: provide empty stub for non-config

stable inclusion
from stable-v4.19.235
commit d60f27d6ff8a8d28441b44cd2e8a2fda64c99aac
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 68453767131a5deec1e8f9ac92a9042f929e585d upstream.

When CONFIG_GENERIC_CPU_VULNERABILITIES is not set, references
to spectre_v2_update_state() cause a build error, so provide an
empty stub for that function when the Kconfig option is not set.

Fixes this build error:

  arm-linux-gnueabi-ld: arch/arm/mm/proc-v7-bugs.o: in function `cpu_v7_bugs_init':
  proc-v7-bugs.c:(.text+0x52): undefined reference to `spectre_v2_update_state'
  arm-linux-gnueabi-ld: proc-v7-bugs.c:(.text+0x82): undefined reference to `spectre_v2_update_state'

Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reported-by: kernel test robot <lkp@intel.com>
Cc: Russell King <rmk+kernel@armlinux.org.uk>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: patches@armlinux.org.uk
Acked-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/include/asm/spectre.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/arch/arm/include/asm/spectre.h b/arch/arm/include/asm/spectre.h
index d1fa5607d3aa..85f9e538fb32 100644
--- a/arch/arm/include/asm/spectre.h
+++ b/arch/arm/include/asm/spectre.h
@@ -25,7 +25,13 @@ enum {
 	SPECTRE_V2_METHOD_LOOP8 = BIT(__SPECTRE_V2_METHOD_LOOP8),
 };
 
+#ifdef CONFIG_GENERIC_CPU_VULNERABILITIES
 void spectre_v2_update_state(unsigned int state, unsigned int methods);
+#else
+static inline void spectre_v2_update_state(unsigned int state,
+					   unsigned int methods)
+{}
+#endif
 
 int spectre_bhb_update_vectors(unsigned int method);
 
-- 
Gitee


From f442b1f0811c0da01e8cf709c65c556575206b1d Mon Sep 17 00:00:00 2001
From: "Russell King (Oracle)" <rmk+kernel@armlinux.org.uk>
Date: Thu, 28 Apr 2022 08:13:04 +0000
Subject: [PATCH 278/340] ARM: fix Thumb2 regression with Spectre BHB

stable inclusion
from stable-v4.19.235
commit 691f354689e94571853c79ac3d7d6e7f27dfb87b
category: bugfix
bugzilla: 186460, https://gitee.com/src-openeuler/kernel/issues/I53MHA
CVE: CVE-2022-23960

--------------------------------

commit 6c7cb60bff7aec24b834343ff433125f469886a3 upstream.

When building for Thumb2, the vectors make use of a local label. Sadly,
the Spectre BHB code also uses a local label with the same number which
results in the Thumb2 reference pointing at the wrong place. Fix this
by changing the number used for the Spectre BHB local label.

Fixes: b9baf5c8c5c3 ("ARM: Spectre-BHB workaround")
Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Russell King (Oracle) <rmk+kernel@armlinux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Liao Chang <liaochang1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/arm/kernel/entry-armv.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index ffcff378e3c8..c2d36d31e908 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -1076,9 +1076,9 @@ vector_bhb_loop8_\name:
 
 	@ bhb workaround
 	mov	r0, #8
-1:	b	. + 4
+3:	b	. + 4
 	subs	r0, r0, #1
-	bne	1b
+	bne	3b
 	dsb
 	isb
 	b	2b
-- 
Gitee


From f63bceddbb8f863c821c4ba2770636125b27cd45 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 29 Apr 2022 03:33:43 +0000
Subject: [PATCH 279/340] llc: fix netdevice reference leaks in llc_ui_bind()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-v4.19.237
commit d14193111c436fc5de33206c67c7afd45c730099
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBN
CVE: CVE-2022-28356

-------------------------------------------------

commit 764f4eb6846f5475f1244767d24d25dd86528a4a upstream.

Whenever llc_ui_bind() and/or llc_ui_autobind()
took a reference on a netdevice but subsequently fail,
they must properly release their reference
or risk the infamous message from unregister_netdevice()
at device dismantle.

unregister_netdevice: waiting for eth0 to become free. Usage count = 3

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: 赵子轩 <beraphin@gmail.com>
Reported-by: Stoyan Manolov <smanolov@suse.de>
Link: https://lore.kernel.org/r/20220323004147.1990845-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/llc/af_llc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index fa0f3c1543ba..239dce060872 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -303,6 +303,10 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 	sock_reset_flag(sk, SOCK_ZAPPED);
 	rc = 0;
 out:
+	if (rc) {
+		dev_put(llc->dev);
+		llc->dev = NULL;
+	}
 	return rc;
 }
 
@@ -401,6 +405,10 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
+	if (rc) {
+		dev_put(llc->dev);
+		llc->dev = NULL;
+	}
 	release_sock(sk);
 	return rc;
 }
-- 
Gitee


From ca0c52d1c8c6317e33a2c574f7ca177902c7d049 Mon Sep 17 00:00:00 2001
From: Yajun Deng <yajun.deng@linux.dev>
Date: Fri, 29 Apr 2022 03:33:44 +0000
Subject: [PATCH 280/340] netdevice: add the case if dev is NULL

stable inclusion
from stable-v4.19.238
commit 574a12b4445076a04499962ce8b8ae15f75a7955
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBN
CVE: CVE-2022-28356

-------------------------------------------------

commit b37a466837393af72fe8bcb8f1436410f3f173f3 upstream.

Add the case if dev is NULL in dev_{put, hold}, so the caller doesn't
need to care whether dev is NULL or not.

Signed-off-by: Yajun Deng <yajun.deng@linux.dev>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Pavel Machek <pavel@denx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/netdevice.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9a7d7e6304f4..752df39590ba 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3790,7 +3790,8 @@ void netdev_run_todo(void);
  */
 static inline void dev_put(struct net_device *dev)
 {
-	this_cpu_dec(*dev->pcpu_refcnt);
+	if (dev)
+		this_cpu_dec(*dev->pcpu_refcnt);
 }
 
 /**
@@ -3801,7 +3802,8 @@ static inline void dev_put(struct net_device *dev)
  */
 static inline void dev_hold(struct net_device *dev)
 {
-	this_cpu_inc(*dev->pcpu_refcnt);
+	if (dev)
+		this_cpu_inc(*dev->pcpu_refcnt);
 }
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
-- 
Gitee


From b694ae58e996c1e29d23a0e0e1c1e042954911cb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 29 Apr 2022 03:33:45 +0000
Subject: [PATCH 281/340] llc: only change llc->dev when bind() succeeds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from stable-v4.19.237
commit c106f9aa6cd8913af9188ad361899ae696b5de37
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBN
CVE: CVE-2022-28356

-------------------------------------------------

commit 2d327a79ee176930dc72c131a970c891d367c1dc upstream.

My latest patch, attempting to fix the refcount leak in a minimal
way turned out to add a new bug.

Whenever the bind operation fails before we attempt to grab
a reference count on a device, we might release the device refcount
of a prior successful bind() operation.

syzbot was not happy about this [1].

Note to stable teams:

Make sure commit b37a46683739 ("netdevice: add the case if dev is NULL")
is already present in your trees.

[1]
general protection fault, probably for non-canonical address 0xdffffc0000000070: 0000 [#1] PREEMPT SMP KASAN
KASAN: null-ptr-deref in range [0x0000000000000380-0x0000000000000387]
CPU: 1 PID: 3590 Comm: syz-executor361 Tainted: G        W         5.17.0-syzkaller-04796-g169e77764adc #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
RIP: 0010:llc_ui_connect+0x400/0xcb0 net/llc/af_llc.c:500
Code: 80 3c 02 00 0f 85 fc 07 00 00 4c 8b a5 38 05 00 00 48 b8 00 00 00 00 00 fc ff df 49 8d bc 24 80 03 00 00 48 89 fa 48 c1 ea 03 <80> 3c 02 00 0f 85 a9 07 00 00 49 8b b4 24 80 03 00 00 4c 89 f2 48
RSP: 0018:ffffc900038cfcc0 EFLAGS: 00010202
RAX: dffffc0000000000 RBX: ffff8880756eb600 RCX: 0000000000000000
RDX: 0000000000000070 RSI: ffffc900038cfe3e RDI: 0000000000000380
RBP: ffff888015ee5000 R08: 0000000000000001 R09: ffff888015ee5535
R10: ffffed1002bdcaa6 R11: 0000000000000000 R12: 0000000000000000
R13: ffffc900038cfe37 R14: ffffc900038cfe38 R15: ffff888015ee5012
FS:  0000555555acd300(0000) GS:ffff8880b9d00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000020000280 CR3: 0000000077db6000 CR4: 00000000003506e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 __sys_connect_file+0x155/0x1a0 net/socket.c:1900
 __sys_connect+0x161/0x190 net/socket.c:1917
 __do_sys_connect net/socket.c:1927 [inline]
 __se_sys_connect net/socket.c:1924 [inline]
 __x64_sys_connect+0x6f/0xb0 net/socket.c:1924
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f016acb90b9
Code: 28 c3 e8 2a 14 00 00 66 2e 0f 1f 84 00 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffd417947f8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a
RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f016acb90b9
RDX: 0000000000000010 RSI: 0000000020000140 RDI: 0000000000000003
RBP: 00007f016ac7d0a0 R08: 0000000000000000 R09: 0000000000000000
R10: 0000000000000000 R11: 0000000000000246 R12: 00007f016ac7d130
R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---
RIP: 0010:llc_ui_connect+0x400/0xcb0 net/llc/af_llc.c:500

Fixes: 764f4eb6846f ("llc: fix netdevice reference leaks in llc_ui_bind()")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Cc: 赵子轩 <beraphin@gmail.com>
Cc: Stoyan Manolov <smanolov@suse.de>
Link: https://lore.kernel.org/r/20220325035827.360418-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/llc/af_llc.c | 57 +++++++++++++++++++++++++++---------------------
 1 file changed, 32 insertions(+), 25 deletions(-)

diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 239dce060872..d23cd51efc5f 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -268,6 +268,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 {
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
+	struct net_device *dev = NULL;
 	struct llc_sap *sap;
 	int rc = -EINVAL;
 
@@ -279,14 +280,14 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 		goto out;
 	rc = -ENODEV;
 	if (sk->sk_bound_dev_if) {
-		llc->dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
-		if (llc->dev && addr->sllc_arphrd != llc->dev->type) {
-			dev_put(llc->dev);
-			llc->dev = NULL;
+		dev = dev_get_by_index(&init_net, sk->sk_bound_dev_if);
+		if (dev && addr->sllc_arphrd != dev->type) {
+			dev_put(dev);
+			dev = NULL;
 		}
 	} else
-		llc->dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
-	if (!llc->dev)
+		dev = dev_getfirstbyhwtype(&init_net, addr->sllc_arphrd);
+	if (!dev)
 		goto out;
 	rc = -EUSERS;
 	llc->laddr.lsap = llc_ui_autoport();
@@ -296,6 +297,11 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 	sap = llc_sap_open(llc->laddr.lsap, NULL);
 	if (!sap)
 		goto out;
+
+	/* Note: We do not expect errors from this point. */
+	llc->dev = dev;
+	dev = NULL;
+
 	memcpy(llc->laddr.mac, llc->dev->dev_addr, IFHWADDRLEN);
 	memcpy(&llc->addr, addr, sizeof(llc->addr));
 	/* assign new connection to its SAP */
@@ -303,10 +309,7 @@ static int llc_ui_autobind(struct socket *sock, struct sockaddr_llc *addr)
 	sock_reset_flag(sk, SOCK_ZAPPED);
 	rc = 0;
 out:
-	if (rc) {
-		dev_put(llc->dev);
-		llc->dev = NULL;
-	}
+	dev_put(dev);
 	return rc;
 }
 
@@ -329,6 +332,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	struct sockaddr_llc *addr = (struct sockaddr_llc *)uaddr;
 	struct sock *sk = sock->sk;
 	struct llc_sock *llc = llc_sk(sk);
+	struct net_device *dev = NULL;
 	struct llc_sap *sap;
 	int rc = -EINVAL;
 
@@ -344,25 +348,26 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 	rc = -ENODEV;
 	rcu_read_lock();
 	if (sk->sk_bound_dev_if) {
-		llc->dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
-		if (llc->dev) {
+		dev = dev_get_by_index_rcu(&init_net, sk->sk_bound_dev_if);
+		if (dev) {
 			if (is_zero_ether_addr(addr->sllc_mac))
-				memcpy(addr->sllc_mac, llc->dev->dev_addr,
+				memcpy(addr->sllc_mac, dev->dev_addr,
 				       IFHWADDRLEN);
-			if (addr->sllc_arphrd != llc->dev->type ||
+			if (addr->sllc_arphrd != dev->type ||
 			    !ether_addr_equal(addr->sllc_mac,
-					      llc->dev->dev_addr)) {
+					      dev->dev_addr)) {
 				rc = -EINVAL;
-				llc->dev = NULL;
+				dev = NULL;
 			}
 		}
-	} else
-		llc->dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
+	} else {
+		dev = dev_getbyhwaddr_rcu(&init_net, addr->sllc_arphrd,
 					   addr->sllc_mac);
-	if (llc->dev)
-		dev_hold(llc->dev);
+	}
+	if (dev)
+		dev_hold(dev);
 	rcu_read_unlock();
-	if (!llc->dev)
+	if (!dev)
 		goto out;
 	if (!addr->sllc_sap) {
 		rc = -EUSERS;
@@ -395,6 +400,11 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 			goto out_put;
 		}
 	}
+
+	/* Note: We do not expect errors from this point. */
+	llc->dev = dev;
+	dev = NULL;
+
 	llc->laddr.lsap = addr->sllc_sap;
 	memcpy(llc->laddr.mac, addr->sllc_mac, IFHWADDRLEN);
 	memcpy(&llc->addr, addr, sizeof(llc->addr));
@@ -405,10 +415,7 @@ static int llc_ui_bind(struct socket *sock, struct sockaddr *uaddr, int addrlen)
 out_put:
 	llc_sap_put(sap);
 out:
-	if (rc) {
-		dev_put(llc->dev);
-		llc->dev = NULL;
-	}
+	dev_put(dev);
 	release_sock(sk);
 	return rc;
 }
-- 
Gitee


From 7883a93a82b98383e15046dca2aa3ae35bc653ec Mon Sep 17 00:00:00 2001
From: Hangyu Hua <hbh25y@gmail.com>
Date: Fri, 29 Apr 2022 03:33:46 +0000
Subject: [PATCH 282/340] can: mcba_usb: mcba_usb_start_xmit(): fix double
 dev_kfree_skb in error path

stable inclusion
from stable-v4.19.238
commit a8bba9fd73775e66b4021b18f2193f769ce48a59
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I51YBN
CVE: CVE-2022-28389

-------------------------------------------------

commit 04c9b00ba83594a29813d6b1fb8fdc93a3915174 upstream.

There is no need to call dev_kfree_skb() when usb_submit_urb() fails
because can_put_echo_skb() deletes original skb and
can_free_echo_skb() deletes the cloned skb.

Fixes: 51f3baad7de9 ("can: mcba_usb: Add support for Microchip CAN BUS Analyzer")
Link: https://lore.kernel.org/all/20220311080208.45047-1-hbh25y@gmail.com
Signed-off-by: Hangyu Hua <hbh25y@gmail.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/can/usb/mcba_usb.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/can/usb/mcba_usb.c b/drivers/net/can/usb/mcba_usb.c
index 1b0afeaf1a3c..25fcec5125ed 100644
--- a/drivers/net/can/usb/mcba_usb.c
+++ b/drivers/net/can/usb/mcba_usb.c
@@ -377,7 +377,6 @@ static netdev_tx_t mcba_usb_start_xmit(struct sk_buff *skb,
 xmit_failed:
 	can_free_echo_skb(priv->netdev, ctx->ndx);
 	mcba_usb_free_ctx(ctx);
-	dev_kfree_skb(skb);
 	stats->tx_dropped++;
 
 	return NETDEV_TX_OK;
-- 
Gitee


From 7b9501c21752e4993e39280273db1f8ffdbc4498 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Fri, 29 Apr 2022 03:33:47 +0000
Subject: [PATCH 283/340] hamradio: defer ax25 kfree after unregister_netdev

stable inclusion
from stable-v4.19.223
commit 896193a02a2981e60c40d4614fd095ce92135ccd
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I55483
CVE: CVE-2022-1195

-------------------------------------------------

commit 3e0588c291d6ce225f2b891753ca41d45ba42469 upstream.

There is a possible race condition (use-after-free) like below

 (USE)                       |  (FREE)
ax25_sendmsg                 |
 ax25_queue_xmit             |
  dev_queue_xmit             |
   __dev_queue_xmit          |
    __dev_xmit_skb           |
     sch_direct_xmit         | ...
      xmit_one               |
       netdev_start_xmit     | tty_ldisc_kill
        __netdev_start_xmit  |  mkiss_close
         ax_xmit             |   kfree
          ax_encaps          |
                             |

Even though there are two synchronization primitives before the kfree:
1. wait_for_completion(&ax->dead). This can prevent the race with
routines from mkiss_ioctl. However, it cannot stop the routine coming
from upper layer, i.e., the ax25_sendmsg.

2. netif_stop_queue(ax->dev). It seems that this line of code aims to
halt the transmit queue but it fails to stop the routine that already
being xmit.

This patch reorder the kfree after the unregister_netdev to avoid the
possible UAF as the unregister_netdev() is well synchronized and won't
return if there is a running routine.

Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/hamradio/mkiss.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index 3b14e6e281d4..cc977d0cb2d9 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -803,13 +803,13 @@ static void mkiss_close(struct tty_struct *tty)
 	 */
 	netif_stop_queue(ax->dev);
 
-	/* Free all AX25 frame buffers. */
-	kfree(ax->rbuff);
-	kfree(ax->xbuff);
-
 	ax->tty = NULL;
 
 	unregister_netdev(ax->dev);
+
+	/* Free all AX25 frame buffers. */
+	kfree(ax->rbuff);
+	kfree(ax->xbuff);
 }
 
 /* Perform I/O control on an active ax25 channel. */
-- 
Gitee


From c46a8e3369dfc7231551ce48cf0039d4836ad9a8 Mon Sep 17 00:00:00 2001
From: Lin Ma <linma@zju.edu.cn>
Date: Fri, 29 Apr 2022 03:33:48 +0000
Subject: [PATCH 284/340] hamradio: improve the incomplete fix to avoid NPD

stable inclusion
from stable-v4.19.223
commit b68f41c6320b2b7fbb54a95f07a69f3dc7e56c59
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I55483
CVE: CVE-2022-1195

-------------------------------------------------

commit b2f37aead1b82a770c48b5d583f35ec22aabb61e upstream.

The previous commit 3e0588c291d6 ("hamradio: defer ax25 kfree after
unregister_netdev") reorder the kfree operations and unregister_netdev
operation to prevent UAF.

This commit improves the previous one by also deferring the nullify of
the ax->tty pointer. Otherwise, a NULL pointer dereference bug occurs.
Partial of the stack trace is shown below.

BUG: kernel NULL pointer dereference, address: 0000000000000538
RIP: 0010:ax_xmit+0x1f9/0x400
...
Call Trace:
 dev_hard_start_xmit+0xec/0x320
 sch_direct_xmit+0xea/0x240
 __qdisc_run+0x166/0x5c0
 __dev_queue_xmit+0x2c7/0xaf0
 ax25_std_establish_data_link+0x59/0x60
 ax25_connect+0x3a0/0x500
 ? security_socket_connect+0x2b/0x40
 __sys_connect+0x96/0xc0
 ? __hrtimer_init+0xc0/0xc0
 ? common_nsleep+0x2e/0x50
 ? switch_fpu_return+0x139/0x1a0
 __x64_sys_connect+0x11/0x20
 do_syscall_64+0x33/0x40
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

The crash point is shown as below

static void ax_encaps(...) {
  ...
  set_bit(TTY_DO_WRITE_WAKEUP, &ax->tty->flags); // ax->tty = NULL!
  ...
}

By placing the nullify action after the unregister_netdev, the ax->tty
pointer won't be assigned as NULL net_device framework layer is well
synchronized.

Signed-off-by: Lin Ma <linma@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Huang Guobin <huangguobin4@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/hamradio/mkiss.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/hamradio/mkiss.c b/drivers/net/hamradio/mkiss.c
index cc977d0cb2d9..a6311310dcaa 100644
--- a/drivers/net/hamradio/mkiss.c
+++ b/drivers/net/hamradio/mkiss.c
@@ -803,13 +803,13 @@ static void mkiss_close(struct tty_struct *tty)
 	 */
 	netif_stop_queue(ax->dev);
 
-	ax->tty = NULL;
-
 	unregister_netdev(ax->dev);
 
 	/* Free all AX25 frame buffers. */
 	kfree(ax->rbuff);
 	kfree(ax->xbuff);
+
+	ax->tty = NULL;
 }
 
 /* Perform I/O control on an active ax25 channel. */
-- 
Gitee


From 9f36ea852c9a5aeedc38a0d5ebcb1851965bf1b4 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Fri, 6 May 2022 12:14:39 +0000
Subject: [PATCH 285/340] mm: Show correct reliable pagecache size

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Show corrent reliable pagecache size if /proc/meminfo if
memory reliable and pagecache use reliable memory is enabled.

Fixes: 9f07eb30e624 ("mm: Add space after ReliableFileCache")
Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/mem_reliable.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 646c27993df4..bca569412685 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -231,7 +231,7 @@ void reliable_report_meminfo(struct seq_file *m)
 		num += global_node_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
 		show_val_kb(m, "FileCache:        ", num);
 		seq_printf(m, "ReliableFileCache: %8llu kB\n",
-			   nr_pagecache_pages);
+			   nr_pagecache_pages << (PAGE_SHIFT - 10));
 	}
 }
 
-- 
Gitee


From 88b64c32007e927a9bdb3be89e6a8d2b73d98e28 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Fri, 6 May 2022 12:14:40 +0000
Subject: [PATCH 286/340] mm: Fix reliable task used problem shown in meminfo

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Previous reliable task used does not contains reliable pagecache pages if
pagecache use reliable memory feature is disabled. This will lead to
incorrect result because reliable task's pagecache is allocated from
mirrored region.

With this patch, reliable task used will always contains reliable anon
pages and reliable pagecache pages.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/mem_reliable.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index bca569412685..82a0486ea697 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -196,7 +196,6 @@ static void show_val_kb(struct seq_file *m, const char *s, unsigned long num)
 
 void reliable_report_meminfo(struct seq_file *m)
 {
-	bool pagecache_enabled = pagecache_reliable_is_enabled();
 	s64 nr_pagecache_pages = 0;
 	s64 nr_anon_pages = 0;
 	long nr_buddy_pages = 0;
@@ -209,8 +208,7 @@ void reliable_report_meminfo(struct seq_file *m)
 		nr_buddy_pages += per_cpu(nr_reliable_buddy_pages, cpu);
 
 	nr_anon_pages = percpu_counter_sum_positive(&anon_reliable_pages);
-	if (pagecache_enabled)
-		nr_pagecache_pages = percpu_counter_sum_positive(&pagecache_reliable_pages);
+	nr_pagecache_pages = percpu_counter_sum_positive(&pagecache_reliable_pages);
 
 	show_val_kb(m, "ReliableTotal:    ",
 			total_reliable_mem_sz() >> PAGE_SHIFT);
@@ -224,7 +222,7 @@ void reliable_report_meminfo(struct seq_file *m)
 			    percpu_counter_sum(&reliable_shmem_used_nr_page));
 	}
 
-	if (pagecache_enabled) {
+	if (pagecache_reliable_is_enabled()) {
 		unsigned long num = 0;
 
 		num += global_node_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
-- 
Gitee


From 01f7c388acbd26782b61be8c2fe8a5ede6820281 Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Fri, 6 May 2022 12:14:41 +0000
Subject: [PATCH 287/340] mm: Add more debug info if oom occurs

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Add more debug info about memory reliable if oom occurs.

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/mem_reliable.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/mm/mem_reliable.c b/mm/mem_reliable.c
index 82a0486ea697..f8b8543d7933 100644
--- a/mm/mem_reliable.c
+++ b/mm/mem_reliable.c
@@ -490,18 +490,40 @@ static void mem_reliable_feature_disable(int idx)
 
 void reliable_show_mem_info(void)
 {
-	s64 num = 0;
+	s64 nr_pagecache_pages = 0;
+	s64 nr_anon_pages = 0;
 
 	if (!mem_reliable_is_enabled())
 		return;
 
-	num += percpu_counter_sum_positive(&anon_reliable_pages);
-	num += percpu_counter_sum_positive(&pagecache_reliable_pages);
+	nr_anon_pages = percpu_counter_sum_positive(&anon_reliable_pages);
+	nr_pagecache_pages = percpu_counter_sum_positive(&pagecache_reliable_pages);
+
+	pr_info("ReliableTotal: %lu kB\n", total_reliable_mem_sz() >> 10);
+	pr_info("ReliableUsed: %lu kB\n", used_reliable_mem_sz() >> 10);
+	pr_info("ReliableTaskLimit: %lu kB\n", task_reliable_limit >> 10);
+	pr_info("ReliableTaskUsed: %lld kB\n",
+		(nr_anon_pages + nr_pagecache_pages) << (PAGE_SHIFT - 10));
+
+	if (shmem_reliable_is_enabled()) {
+		pr_info("ReliableShmemPagesLimit: %ld\n",
+			shmem_reliable_nr_page);
+		pr_info("ReliableShmem: %llu kB\n",
+			percpu_counter_sum(&reliable_shmem_used_nr_page)
+				<< (PAGE_SHIFT - 10));
+	}
+
+	if (pagecache_reliable_is_enabled()) {
+		unsigned long num = 0;
 
-	pr_info("ReliableTotal: %lu kB", total_reliable_mem_sz() >> 10);
-	pr_info("ReliableUsed: %lu kB", used_reliable_mem_sz() >> 10);
-	pr_info("task_reliable_limit: %lu kB", task_reliable_limit >> 10);
-	pr_info("reliable_user_used: %lld kB", num << (PAGE_SHIFT - 10));
+		num += global_node_page_state(NR_LRU_BASE + LRU_ACTIVE_FILE);
+		num += global_node_page_state(NR_LRU_BASE + LRU_INACTIVE_FILE);
+		pr_info("ReliableFileCacheLimit: %lu kB\n",
+			reliable_pagecache_max_bytes >> 10);
+		pr_info("FileCache: %lu kB\n", num << (PAGE_SHIFT - 10));
+		pr_info("ReliableFileCache: %llu kB\n",
+			nr_pagecache_pages << (PAGE_SHIFT - 10));
+	}
 }
 
 void mem_reliable_out_of_memory(gfp_t gfp_mask, unsigned int order,
-- 
Gitee


From 74ef98cdaa031ef0539a005b2cef3de31a3ebca5 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 6 May 2022 12:14:42 +0000
Subject: [PATCH 288/340] x86/asm: Pin sensitive CR4 bits

mainline inclusion
from mainline-v5.3-rc1
commit 873d50d58f67ef15d2777b5e7f7a5268bb1fbae2
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I563FS
CVE: NA

--------------------------------

Several recent exploits have used direct calls to the native_write_cr4()
function to disable SMEP and SMAP before then continuing their exploits
using userspace memory access.

Direct calls of this form can be mitigate by pinning bits of CR4 so that
they cannot be changed through a common function. This is not intended to
be a general ROP protection (which would require CFI to defend against
properly), but rather a way to avoid trivial direct function calling (or
CFI bypasses via a matching function prototype) as seen in:

https://googleprojectzero.blogspot.com/2017/05/exploiting-linux-kernel-via-packet.html
(https://github.com/xairy/kernel-exploits/tree/master/CVE-2017-7308)

The goals of this change:

 - Pin specific bits (SMEP, SMAP, and UMIP) when writing CR4.

 - Avoid setting the bits too early (they must become pinned only after
   CPU feature detection and selection has finished).

 - Pinning mask needs to be read-only during normal runtime.

 - Pinning needs to be checked after write to validate the cr4 state

Using __ro_after_init on the mask is done so it can't be first disabled
with a malicious write.

Since these bits are global state (once established by the boot CPU and
kernel boot parameters), they are safe to write to secondary CPUs before
those CPUs have finished feature detection. As such, the bits are set at
the first cr4 write, so that cr4 write bugs can be detected (instead of
silently papered over). This uses a few bytes less storage of a location we
don't have: read-only per-CPU data.

A check is performed after the register write because an attack could just
skip directly to the register write. Such a direct jump is possible because
of how this function may be built by the compiler (especially due to the
removal of frame pointers) where it doesn't add a stack frame (function
exit may only be a retq without pops) which is sufficient for trivial
exploitation like in the timer overwrites mentioned above).

The asm argument constraints gain the "+" modifier to convince the compiler
that it shouldn't make ordering assumptions about the arguments or memory,
and treat them as changed.

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: https://lkml.kernel.org/r/20190618045503.39105-3-keescook@chromium.org
Signed-off-by: Lin Yujun <linyujun809@huawei.com>
Reviewed-by: Zhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/include/asm/special_insns.h | 22 +++++++++++++++++++++-
 arch/x86/kernel/cpu/common.c         | 20 ++++++++++++++++++++
 arch/x86/kernel/smpboot.c            |  8 +++++++-
 3 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index b684dfdaaec1..ea8158e8fa4b 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -6,6 +6,8 @@
 #ifdef __KERNEL__
 
 #include <asm/nops.h>
+#include <asm/processor-flags.h>
+#include <linux/jump_label.h>
 
 /*
  * Volatile isn't enough to prevent the compiler from reordering the
@@ -16,6 +18,10 @@
  */
 extern unsigned long __force_order;
 
+/* Starts false and gets enabled once CPU feature detection is done. */
+DECLARE_STATIC_KEY_FALSE(cr_pinning);
+extern unsigned long cr4_pinned_bits;
+
 static inline unsigned long native_read_cr0(void)
 {
 	unsigned long val;
@@ -74,7 +80,21 @@ static inline unsigned long native_read_cr4(void)
 
 static inline void native_write_cr4(unsigned long val)
 {
-	asm volatile("mov %0,%%cr4": : "r" (val), "m" (__force_order));
+	unsigned long bits_missing = 0;
+
+set_register:
+	asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+	if (static_branch_likely(&cr_pinning)) {
+		if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+			bits_missing = ~val & cr4_pinned_bits;
+			val |= bits_missing;
+			goto set_register;
+		}
+		/* Warn after we've set the missing bits. */
+		WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+			  bits_missing);
+	}
 }
 
 #ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6fe28a1b1867..a7d2fff95755 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -365,6 +365,25 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 	cr4_clear_bits(X86_CR4_UMIP);
 }
 
+DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+EXPORT_SYMBOL(cr_pinning);
+unsigned long cr4_pinned_bits __ro_after_init;
+EXPORT_SYMBOL(cr4_pinned_bits);
+
+/*
+ * Once CPU feature detection is finished (and boot params have been
+ * parsed), record any of the sensitive CR bits that are set, and
+ * enable CR pinning.
+ */
+static void __init setup_cr_pinning(void)
+{
+	unsigned long mask;
+
+	mask = (X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP);
+	cr4_pinned_bits = this_cpu_read(cpu_tlbstate.cr4) & mask;
+	static_key_enable(&cr_pinning.key);
+}
+
 /*
  * Protection Keys are not available in 32-bit mode.
  */
@@ -1588,6 +1607,7 @@ void __init identify_boot_cpu(void)
 #endif
 	cpu_detect_tlb(&boot_cpu_data);
 	tsx_init();
+	setup_cr_pinning();
 }
 
 void identify_secondary_cpu(struct cpuinfo_x86 *c)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 08b5f534fabb..23021970136e 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -212,13 +212,19 @@ static int enable_start_cpu0;
  */
 static void notrace start_secondary(void *unused)
 {
+	unsigned long cr4 = __read_cr4();
+
 	/*
 	 * Don't put *anything* except direct CPU state initialization
 	 * before cpu_init(), SMP booting is too fragile that we want to
 	 * limit the things done here to the most necessary things.
 	 */
 	if (boot_cpu_has(X86_FEATURE_PCID))
-		__write_cr4(__read_cr4() | X86_CR4_PCIDE);
+		cr4 |= X86_CR4_PCIDE;
+	if (static_branch_likely(&cr_pinning))
+		cr4 |= cr4_pinned_bits;
+
+	__write_cr4(cr4);
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
-- 
Gitee


From 1143d1dd32d8f010bcd678a5f945b7e21f81d373 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 6 May 2022 12:14:43 +0000
Subject: [PATCH 289/340] x86/asm: Pin sensitive CR0 bits

mainline inclusion
from mainline-v5.3-rc1
commit 8dbec27a242cd3e2816eeb98d3237b9f57cf6232
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I563FS
CVE: NA

--------------------------------

With sensitive CR4 bits pinned now, it's possible that the WP bit for
CR0 might become a target as well.

Following the same reasoning for the CR4 pinning, pin CR0's WP
bit. Contrary to the cpu feature dependend CR4 pinning this can be done
with a constant value.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: kernel-hardening@lists.openwall.com
Link: https://lkml.kernel.org/r/20190618045503.39105-4-keescook@chromium.org
Signed-off-by: Lin Yujun <linyujun809@huawei.com>
Reviewed-by: Zhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/include/asm/special_insns.h | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index ea8158e8fa4b..87c371e87743 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -31,7 +31,20 @@ static inline unsigned long native_read_cr0(void)
 
 static inline void native_write_cr0(unsigned long val)
 {
-	asm volatile("mov %0,%%cr0": : "r" (val), "m" (__force_order));
+	unsigned long bits_missing = 0;
+
+set_register:
+	asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+	if (static_branch_likely(&cr_pinning)) {
+		if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+			bits_missing = X86_CR0_WP;
+			val |= bits_missing;
+			goto set_register;
+		}
+		/* Warn after we've set the missing bits. */
+		WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+	}
 }
 
 static inline unsigned long native_read_cr2(void)
-- 
Gitee


From f1434c1db5ec10f3c931dcde9d71779e3f78f1dc Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 6 May 2022 12:14:44 +0000
Subject: [PATCH 290/340] x86/asm: Move native_write_cr0/4() out of line

mainline inclusion
from mainline-v5.3-rc1
commit 7652ac92018536eb807b6c2130100c85f1ba7e3b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I563FS
CVE: NA

--------------------------------

The pinning of sensitive CR0 and CR4 bits caused a boot crash when loading
the kvm_intel module on a kernel compiled with CONFIG_PARAVIRT=n.

The reason is that the static key which controls the pinning is marked RO
after init. The kvm_intel module contains a CR4 write which requires to
update the static key entry list. That obviously does not work when the key
is in a RO section.

With CONFIG_PARAVIRT enabled this does not happen because the CR4 write
uses the paravirt indirection and the actual write function is built in.

As the key is intended to be immutable after init, move
native_write_cr0/4() out of line.

While at it consolidate the update of the cr4 shadow variable and store the
value right away when the pinning is initialized on a booting CPU. No point
in reading it back 20 instructions later. This allows to confine the static
key and the pinning variable to cpu/common and allows to mark them static.

Fixes: 8dbec27a242c ("x86/asm: Pin sensitive CR0 bits")
Fixes: 873d50d58f67 ("x86/asm: Pin sensitive CR4 bits")
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Reported-by: Xi Ruoyao <xry111@mengyan1223.wang>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Xi Ruoyao <xry111@mengyan1223.wang>
Acked-by: Kees Cook <keescook@chromium.org>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1907102140340.1758@nanos.tec.linutronix.de
Signed-off-by: Lin Yujun <linyujun809@huawei.com>
Reviewed-by: Zhang Jianhua <chris.zjh@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 arch/x86/include/asm/processor.h     |  1 +
 arch/x86/include/asm/special_insns.h | 41 +---------------
 arch/x86/kernel/cpu/common.c         | 72 +++++++++++++++++++++-------
 arch/x86/kernel/smpboot.c            | 14 +-----
 arch/x86/xen/smp_pv.c                |  1 +
 5 files changed, 61 insertions(+), 68 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index e119c78581d1..307aa9454df3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -763,6 +763,7 @@ extern void load_direct_gdt(int);
 extern void load_fixmap_gdt(int);
 extern void load_percpu_segment(int);
 extern void cpu_init(void);
+extern void cr4_init(void);
 
 static inline unsigned long get_debugctlmsr(void)
 {
diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h
index 87c371e87743..af5f0d9a9951 100644
--- a/arch/x86/include/asm/special_insns.h
+++ b/arch/x86/include/asm/special_insns.h
@@ -18,9 +18,7 @@
  */
 extern unsigned long __force_order;
 
-/* Starts false and gets enabled once CPU feature detection is done. */
-DECLARE_STATIC_KEY_FALSE(cr_pinning);
-extern unsigned long cr4_pinned_bits;
+void native_write_cr0(unsigned long val);
 
 static inline unsigned long native_read_cr0(void)
 {
@@ -29,24 +27,6 @@ static inline unsigned long native_read_cr0(void)
 	return val;
 }
 
-static inline void native_write_cr0(unsigned long val)
-{
-	unsigned long bits_missing = 0;
-
-set_register:
-	asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
-
-	if (static_branch_likely(&cr_pinning)) {
-		if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
-			bits_missing = X86_CR0_WP;
-			val |= bits_missing;
-			goto set_register;
-		}
-		/* Warn after we've set the missing bits. */
-		WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
-	}
-}
-
 static inline unsigned long native_read_cr2(void)
 {
 	unsigned long val;
@@ -91,24 +71,7 @@ static inline unsigned long native_read_cr4(void)
 	return val;
 }
 
-static inline void native_write_cr4(unsigned long val)
-{
-	unsigned long bits_missing = 0;
-
-set_register:
-	asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
-
-	if (static_branch_likely(&cr_pinning)) {
-		if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
-			bits_missing = ~val & cr4_pinned_bits;
-			val |= bits_missing;
-			goto set_register;
-		}
-		/* Warn after we've set the missing bits. */
-		WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
-			  bits_missing);
-	}
-}
+void native_write_cr4(unsigned long val);
 
 #ifdef CONFIG_X86_64
 static inline unsigned long native_read_cr8(void)
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index a7d2fff95755..a0306327e7ed 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -365,10 +365,62 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c)
 	cr4_clear_bits(X86_CR4_UMIP);
 }
 
-DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
-EXPORT_SYMBOL(cr_pinning);
-unsigned long cr4_pinned_bits __ro_after_init;
-EXPORT_SYMBOL(cr4_pinned_bits);
+static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning);
+static unsigned long cr4_pinned_bits __ro_after_init;
+
+void native_write_cr0(unsigned long val)
+{
+	unsigned long bits_missing = 0;
+
+set_register:
+	asm volatile("mov %0,%%cr0": "+r" (val), "+m" (__force_order));
+
+	if (static_branch_likely(&cr_pinning)) {
+		if (unlikely((val & X86_CR0_WP) != X86_CR0_WP)) {
+			bits_missing = X86_CR0_WP;
+			val |= bits_missing;
+			goto set_register;
+		}
+		/* Warn after we've set the missing bits. */
+		WARN_ONCE(bits_missing, "CR0 WP bit went missing!?\n");
+	}
+}
+EXPORT_SYMBOL(native_write_cr0);
+
+void native_write_cr4(unsigned long val)
+{
+	unsigned long bits_missing = 0;
+
+set_register:
+	asm volatile("mov %0,%%cr4": "+r" (val), "+m" (cr4_pinned_bits));
+
+	if (static_branch_likely(&cr_pinning)) {
+		if (unlikely((val & cr4_pinned_bits) != cr4_pinned_bits)) {
+			bits_missing = ~val & cr4_pinned_bits;
+			val |= bits_missing;
+			goto set_register;
+		}
+		/* Warn after we've set the missing bits. */
+		WARN_ONCE(bits_missing, "CR4 bits went missing: %lx!?\n",
+			  bits_missing);
+	}
+}
+EXPORT_SYMBOL(native_write_cr4);
+
+void cr4_init(void)
+{
+	unsigned long cr4 = __read_cr4();
+
+	if (boot_cpu_has(X86_FEATURE_PCID))
+		cr4 |= X86_CR4_PCIDE;
+	if (static_branch_likely(&cr_pinning))
+		cr4 |= cr4_pinned_bits;
+
+	__write_cr4(cr4);
+
+	/* Initialize cr4 shadow for this CPU. */
+	this_cpu_write(cpu_tlbstate.cr4, cr4);
+}
 
 /*
  * Once CPU feature detection is finished (and boot params have been
@@ -1851,12 +1903,6 @@ void cpu_init(void)
 
 	wait_for_master_cpu(cpu);
 
-	/*
-	 * Initialize the CR4 shadow before doing anything that could
-	 * try to read it.
-	 */
-	cr4_init_shadow();
-
 	if (cpu)
 		load_ucode_ap();
 
@@ -1956,12 +2002,6 @@ void cpu_init(void)
 
 	wait_for_master_cpu(cpu);
 
-	/*
-	 * Initialize the CR4 shadow before doing anything that could
-	 * try to read it.
-	 */
-	cr4_init_shadow();
-
 	show_ucode_info_early();
 
 	pr_info("Initializing CPU#%d\n", cpu);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 23021970136e..ee697fa8847d 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -212,28 +212,16 @@ static int enable_start_cpu0;
  */
 static void notrace start_secondary(void *unused)
 {
-	unsigned long cr4 = __read_cr4();
-
 	/*
 	 * Don't put *anything* except direct CPU state initialization
 	 * before cpu_init(), SMP booting is too fragile that we want to
 	 * limit the things done here to the most necessary things.
 	 */
-	if (boot_cpu_has(X86_FEATURE_PCID))
-		cr4 |= X86_CR4_PCIDE;
-	if (static_branch_likely(&cr_pinning))
-		cr4 |= cr4_pinned_bits;
-
-	__write_cr4(cr4);
+	cr4_init();
 
 #ifdef CONFIG_X86_32
 	/* switch away from the initial page table */
 	load_cr3(swapper_pg_dir);
-	/*
-	 * Initialize the CR4 shadow before doing anything that could
-	 * try to read it.
-	 */
-	cr4_init_shadow();
 	__flush_tlb_all();
 #endif
 	load_current_idt();
diff --git a/arch/x86/xen/smp_pv.c b/arch/x86/xen/smp_pv.c
index e3b18ad49889..32a9c2212124 100644
--- a/arch/x86/xen/smp_pv.c
+++ b/arch/x86/xen/smp_pv.c
@@ -57,6 +57,7 @@ static void cpu_bringup(void)
 {
 	int cpu;
 
+	cr4_init();
 	cpu_init();
 	touch_softlockup_watchdog();
 	preempt_disable();
-- 
Gitee


From d96ca30cacc3c08345ecd0c95a9d237c86a9ee77 Mon Sep 17 00:00:00 2001
From: david regan <dregan@mail.com>
Date: Fri, 6 May 2022 12:14:45 +0000
Subject: [PATCH 291/340] mtd: rawnand: brcmnand: Fixed incorrect sub-page ECC
 status

stable inclusion
from stable-v4.19.231
commit c48771b7f7b086bca5a1094191b33c3c4324bf0b
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: N/A

------------------------------------------------

[ Upstream commit 36415a7964711822e63695ea67fede63979054d9 ]

The brcmnand driver contains a bug in which if a page (example 2k byte)
is read from the parallel/ONFI NAND and within that page a subpage (512
byte) has correctable errors which is followed by a subpage with
uncorrectable errors, the page read will return the wrong status of
correctable (as opposed to the actual status of uncorrectable.)

The bug is in function brcmnand_read_by_pio where there is a check for
uncorrectable bits which will be preempted if a previous status for
correctable bits is detected.

The fix is to stop checking for bad bits only if we already have a bad
bits status.

Fixes: 27c5b17cd1b1 ("mtd: nand: add NAND driver "library" for Broadcom STB NAND controller")
Signed-off-by: david regan <dregan@mail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
Link: https://lore.kernel.org/linux-mtd/trinity-478e0c09-9134-40e8-8f8c-31c371225eda-1643237024774@3c-app-mailcom-lxa02
Signed-off-by: Sasha Levin <sashal@kernel.org>

Conflicts:
	drivers/mtd/nand/raw/brcmnand/brcmnand.c
Signed-off-by: Cui GaoSheng <cuigaosheng1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/mtd/nand/raw/brcmnand/brcmnand.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/mtd/nand/raw/brcmnand/brcmnand.c b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
index 4b90d5b380c2..6fbca4ae1113 100644
--- a/drivers/mtd/nand/raw/brcmnand/brcmnand.c
+++ b/drivers/mtd/nand/raw/brcmnand/brcmnand.c
@@ -1633,7 +1633,7 @@ static int brcmnand_read_by_pio(struct mtd_info *mtd, struct nand_chip *chip,
 					mtd->oobsize / trans,
 					host->hwcfg.sector_size_1k);
 
-		if (!ret) {
+		if (ret != -EBADMSG) {
 			*err_addr = brcmnand_read_reg(ctrl,
 					BRCMNAND_UNCORR_ADDR) |
 				((u64)(brcmnand_read_reg(ctrl,
-- 
Gitee


From 7959a47042a6c1ec5a97966cf3c04ee972b55348 Mon Sep 17 00:00:00 2001
From: Ronnie Sahlberg <lsahlber@redhat.com>
Date: Sat, 7 May 2022 08:24:21 +0000
Subject: [PATCH 292/340] cifs: fix double free race when mount fails in
 cifs_get_root()

stable inclusion
from linux-4.19.233
commit 2fe0e281f7ad0a62259649764228227dd6b2561d
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit 3d6cc9898efdfb062efb74dc18cfc700e082f5d5 ]

When cifs_get_root() fails during cifs_smb3_do_mount() we call
deactivate_locked_super() which eventually will call delayed_free() which
will free the context.
In this situation we should not proceed to enter the out: section in
cifs_smb3_do_mount() and free the same resources a second time.

[Thu Feb 10 12:59:06 2022] BUG: KASAN: use-after-free in rcu_cblist_dequeue+0x32/0x60
[Thu Feb 10 12:59:06 2022] Read of size 8 at addr ffff888364f4d110 by task swapper/1/0

[Thu Feb 10 12:59:06 2022] CPU: 1 PID: 0 Comm: swapper/1 Tainted: G           OE     5.17.0-rc3+ #4
[Thu Feb 10 12:59:06 2022] Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS Hyper-V UEFI Release v4.0 12/17/2019
[Thu Feb 10 12:59:06 2022] Call Trace:
[Thu Feb 10 12:59:06 2022]  <IRQ>
[Thu Feb 10 12:59:06 2022]  dump_stack_lvl+0x5d/0x78
[Thu Feb 10 12:59:06 2022]  print_address_description.constprop.0+0x24/0x150
[Thu Feb 10 12:59:06 2022]  ? rcu_cblist_dequeue+0x32/0x60
[Thu Feb 10 12:59:06 2022]  kasan_report.cold+0x7d/0x117
[Thu Feb 10 12:59:06 2022]  ? rcu_cblist_dequeue+0x32/0x60
[Thu Feb 10 12:59:06 2022]  __asan_load8+0x86/0xa0
[Thu Feb 10 12:59:06 2022]  rcu_cblist_dequeue+0x32/0x60
[Thu Feb 10 12:59:06 2022]  rcu_core+0x547/0xca0
[Thu Feb 10 12:59:06 2022]  ? call_rcu+0x3c0/0x3c0
[Thu Feb 10 12:59:06 2022]  ? __this_cpu_preempt_check+0x13/0x20
[Thu Feb 10 12:59:06 2022]  ? lock_is_held_type+0xea/0x140
[Thu Feb 10 12:59:06 2022]  rcu_core_si+0xe/0x10
[Thu Feb 10 12:59:06 2022]  __do_softirq+0x1d4/0x67b
[Thu Feb 10 12:59:06 2022]  __irq_exit_rcu+0x100/0x150
[Thu Feb 10 12:59:06 2022]  irq_exit_rcu+0xe/0x30
[Thu Feb 10 12:59:06 2022]  sysvec_hyperv_stimer0+0x9d/0xc0
...
[Thu Feb 10 12:59:07 2022] Freed by task 58179:
[Thu Feb 10 12:59:07 2022]  kasan_save_stack+0x26/0x50
[Thu Feb 10 12:59:07 2022]  kasan_set_track+0x25/0x30
[Thu Feb 10 12:59:07 2022]  kasan_set_free_info+0x24/0x40
[Thu Feb 10 12:59:07 2022]  ____kasan_slab_free+0x137/0x170
[Thu Feb 10 12:59:07 2022]  __kasan_slab_free+0x12/0x20
[Thu Feb 10 12:59:07 2022]  slab_free_freelist_hook+0xb3/0x1d0
[Thu Feb 10 12:59:07 2022]  kfree+0xcd/0x520
[Thu Feb 10 12:59:07 2022]  cifs_smb3_do_mount+0x149/0xbe0 [cifs]
[Thu Feb 10 12:59:07 2022]  smb3_get_tree+0x1a0/0x2e0 [cifs]
[Thu Feb 10 12:59:07 2022]  vfs_get_tree+0x52/0x140
[Thu Feb 10 12:59:07 2022]  path_mount+0x635/0x10c0
[Thu Feb 10 12:59:07 2022]  __x64_sys_mount+0x1bf/0x210
[Thu Feb 10 12:59:07 2022]  do_syscall_64+0x5c/0xc0
[Thu Feb 10 12:59:07 2022]  entry_SYSCALL_64_after_hwframe+0x44/0xae

[Thu Feb 10 12:59:07 2022] Last potentially related work creation:
[Thu Feb 10 12:59:07 2022]  kasan_save_stack+0x26/0x50
[Thu Feb 10 12:59:07 2022]  __kasan_record_aux_stack+0xb6/0xc0
[Thu Feb 10 12:59:07 2022]  kasan_record_aux_stack_noalloc+0xb/0x10
[Thu Feb 10 12:59:07 2022]  call_rcu+0x76/0x3c0
[Thu Feb 10 12:59:07 2022]  cifs_umount+0xce/0xe0 [cifs]
[Thu Feb 10 12:59:07 2022]  cifs_kill_sb+0xc8/0xe0 [cifs]
[Thu Feb 10 12:59:07 2022]  deactivate_locked_super+0x5d/0xd0
[Thu Feb 10 12:59:07 2022]  cifs_smb3_do_mount+0xab9/0xbe0 [cifs]
[Thu Feb 10 12:59:07 2022]  smb3_get_tree+0x1a0/0x2e0 [cifs]
[Thu Feb 10 12:59:07 2022]  vfs_get_tree+0x52/0x140
[Thu Feb 10 12:59:07 2022]  path_mount+0x635/0x10c0
[Thu Feb 10 12:59:07 2022]  __x64_sys_mount+0x1bf/0x210
[Thu Feb 10 12:59:07 2022]  do_syscall_64+0x5c/0xc0
[Thu Feb 10 12:59:07 2022]  entry_SYSCALL_64_after_hwframe+0x44/0xae

Reported-by: Shyam Prasad N <sprasad@microsoft.com>
Reviewed-by: Shyam Prasad N <sprasad@microsoft.com>
Signed-off-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/cifs/cifsfs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index bc906fcf3f6d..baa1713d6695 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -779,6 +779,7 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
 
 out_super:
 	deactivate_locked_super(sb);
+	return root;
 out:
 	cifs_cleanup_volume_info(volume_info);
 	return root;
-- 
Gitee


From 52dfb991a57677751d6e108539e1bafb67370db9 Mon Sep 17 00:00:00 2001
From: Jiri Bohac <jbohac@suse.cz>
Date: Sat, 7 May 2022 08:24:22 +0000
Subject: [PATCH 293/340] xfrm: fix MTU regression

stable inclusion
from linux-4.19.233
commit 20fe64c54412ca42efb9ba07f856ed8cac6e007e
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 6596a0229541270fb8d38d989f91b78838e5e9da upstream.

Commit 749439bfac6e1a2932c582e2699f91d329658196 ("ipv6: fix udpv6
sendmsg crash caused by too small MTU") breaks PMTU for xfrm.

A Packet Too Big ICMPv6 message received in response to an ESP
packet will prevent all further communication through the tunnel
if the reported MTU minus the ESP overhead is smaller than 1280.

E.g. in a case of a tunnel-mode ESP with sha256/aes the overhead
is 92 bytes. Receiving a PTB with MTU of 1371 or less will result
in all further packets in the tunnel dropped. A ping through the
tunnel fails with "ping: sendmsg: Invalid argument".

Apparently the MTU on the xfrm route is smaller than 1280 and
fails the check inside ip6_setup_cork() added by 749439bf.

We found this by debugging USGv6/ipv6ready failures. Failing
tests are: "Phase-2 Interoperability Test Scenario IPsec" /
5.3.11 and 5.4.11 (Tunnel Mode: Fragmentation).

Commit b515d2637276a3810d6595e10ab02c13bfd0b63a ("xfrm:
xfrm_state_mtu should return at least 1280 for ipv6") attempted
to fix this but caused another regression in TCP MSS calculations
and had to be reverted.

The patch below fixes the situation by dropping the MTU
check and instead checking for the underflows described in the
749439bf commit message.

Signed-off-by: Jiri Bohac <jbohac@suse.cz>
Fixes: 749439bfac6e ("ipv6: fix udpv6 sendmsg crash caused by too small MTU")
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ipv6/ip6_output.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 251ec12517e9..fd496bda7ec7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1259,8 +1259,6 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
 		if (np->frag_size)
 			mtu = np->frag_size;
 	}
-	if (mtu < IPV6_MIN_MTU)
-		return -EINVAL;
 	cork->base.fragsize = mtu;
 	cork->base.gso_size = ipc6->gso_size;
 	cork->base.tx_flags = 0;
@@ -1320,8 +1318,6 @@ static int __ip6_append_data(struct sock *sk,
 
 	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
 			(opt ? opt->opt_nflen : 0);
-	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
-		     sizeof(struct frag_hdr);
 
 	headersize = sizeof(struct ipv6hdr) +
 		     (opt ? opt->opt_flen + opt->opt_nflen : 0) +
@@ -1329,6 +1325,13 @@ static int __ip6_append_data(struct sock *sk,
 		      sizeof(struct frag_hdr) : 0) +
 		     rt->rt6i_nfheader_len;
 
+	if (mtu < fragheaderlen ||
+	    ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+		goto emsgsize;
+
+	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
+		     sizeof(struct frag_hdr);
+
 	/* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
 	 * the first fragment
 	 */
-- 
Gitee


From 2d3caf2452989c9efa16f6ce6dc4b7c48d026d24 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 7 May 2022 08:24:23 +0000
Subject: [PATCH 294/340] netfilter: fix use-after-free in
 __nf_register_net_hook()

stable inclusion
from linux-4.19.233
commit bdd8fc1b826e6f23963f5bef3f7431c6188ec954
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 56763f12b0f02706576a088e85ef856deacc98a0 upstream.

We must not dereference @new_hooks after nf_hook_mutex has been released,
because other threads might have freed our allocated hooks already.

BUG: KASAN: use-after-free in nf_hook_entries_get_hook_ops include/linux/netfilter.h:130 [inline]
BUG: KASAN: use-after-free in hooks_validate net/netfilter/core.c:171 [inline]
BUG: KASAN: use-after-free in __nf_register_net_hook+0x77a/0x820 net/netfilter/core.c:438
Read of size 2 at addr ffff88801c1a8000 by task syz-executor237/4430

CPU: 1 PID: 4430 Comm: syz-executor237 Not tainted 5.17.0-rc5-syzkaller-00306-g2293be58d6a1 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0x8d/0x336 mm/kasan/report.c:255
 __kasan_report mm/kasan/report.c:442 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
 nf_hook_entries_get_hook_ops include/linux/netfilter.h:130 [inline]
 hooks_validate net/netfilter/core.c:171 [inline]
 __nf_register_net_hook+0x77a/0x820 net/netfilter/core.c:438
 nf_register_net_hook+0x114/0x170 net/netfilter/core.c:571
 nf_register_net_hooks+0x59/0xc0 net/netfilter/core.c:587
 nf_synproxy_ipv6_init+0x85/0xe0 net/netfilter/nf_synproxy_core.c:1218
 synproxy_tg6_check+0x30d/0x560 net/ipv6/netfilter/ip6t_SYNPROXY.c:81
 xt_check_target+0x26c/0x9e0 net/netfilter/x_tables.c:1038
 check_target net/ipv6/netfilter/ip6_tables.c:530 [inline]
 find_check_entry.constprop.0+0x7f1/0x9e0 net/ipv6/netfilter/ip6_tables.c:573
 translate_table+0xc8b/0x1750 net/ipv6/netfilter/ip6_tables.c:735
 do_replace net/ipv6/netfilter/ip6_tables.c:1153 [inline]
 do_ip6t_set_ctl+0x56e/0xb90 net/ipv6/netfilter/ip6_tables.c:1639
 nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101
 ipv6_setsockopt+0x122/0x180 net/ipv6/ipv6_sockglue.c:1024
 rawv6_setsockopt+0xd3/0x6a0 net/ipv6/raw.c:1084
 __sys_setsockopt+0x2db/0x610 net/socket.c:2180
 __do_sys_setsockopt net/socket.c:2191 [inline]
 __se_sys_setsockopt net/socket.c:2188 [inline]
 __x64_sys_setsockopt+0xba/0x150 net/socket.c:2188
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7f65a1ace7d9
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 71 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 b8 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007f65a1a7f308 EFLAGS: 00000246 ORIG_RAX: 0000000000000036
RAX: ffffffffffffffda RBX: 0000000000000006 RCX: 00007f65a1ace7d9
RDX: 0000000000000040 RSI: 0000000000000029 RDI: 0000000000000003
RBP: 00007f65a1b574c8 R08: 0000000000000001 R09: 0000000000000000
R10: 0000000020000000 R11: 0000000000000246 R12: 00007f65a1b55130
R13: 00007f65a1b574c0 R14: 00007f65a1b24090 R15: 0000000000022000
 </TASK>

The buggy address belongs to the page:
page:ffffea0000706a00 refcount:0 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1c1a8
flags: 0xfff00000000000(node=0|zone=1|lastcpupid=0x7ff)
raw: 00fff00000000000 ffffea0001c1b108 ffffea000046dd08 0000000000000000
raw: 0000000000000000 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected
page_owner tracks the page as freed
page last allocated via order 2, migratetype Unmovable, gfp_mask 0x52dc0(GFP_KERNEL|__GFP_NOWARN|__GFP_NORETRY|__GFP_COMP|__GFP_ZERO), pid 4430, ts 1061781545818, free_ts 1061791488993
 prep_new_page mm/page_alloc.c:2434 [inline]
 get_page_from_freelist+0xa72/0x2f50 mm/page_alloc.c:4165
 __alloc_pages+0x1b2/0x500 mm/page_alloc.c:5389
 __alloc_pages_node include/linux/gfp.h:572 [inline]
 alloc_pages_node include/linux/gfp.h:595 [inline]
 kmalloc_large_node+0x62/0x130 mm/slub.c:4438
 __kmalloc_node+0x35a/0x4a0 mm/slub.c:4454
 kmalloc_node include/linux/slab.h:604 [inline]
 kvmalloc_node+0x97/0x100 mm/util.c:580
 kvmalloc include/linux/slab.h:731 [inline]
 kvzalloc include/linux/slab.h:739 [inline]
 allocate_hook_entries_size net/netfilter/core.c:61 [inline]
 nf_hook_entries_grow+0x140/0x780 net/netfilter/core.c:128
 __nf_register_net_hook+0x144/0x820 net/netfilter/core.c:429
 nf_register_net_hook+0x114/0x170 net/netfilter/core.c:571
 nf_register_net_hooks+0x59/0xc0 net/netfilter/core.c:587
 nf_synproxy_ipv6_init+0x85/0xe0 net/netfilter/nf_synproxy_core.c:1218
 synproxy_tg6_check+0x30d/0x560 net/ipv6/netfilter/ip6t_SYNPROXY.c:81
 xt_check_target+0x26c/0x9e0 net/netfilter/x_tables.c:1038
 check_target net/ipv6/netfilter/ip6_tables.c:530 [inline]
 find_check_entry.constprop.0+0x7f1/0x9e0 net/ipv6/netfilter/ip6_tables.c:573
 translate_table+0xc8b/0x1750 net/ipv6/netfilter/ip6_tables.c:735
 do_replace net/ipv6/netfilter/ip6_tables.c:1153 [inline]
 do_ip6t_set_ctl+0x56e/0xb90 net/ipv6/netfilter/ip6_tables.c:1639
 nf_setsockopt+0x83/0xe0 net/netfilter/nf_sockopt.c:101
page last free stack trace:
 reset_page_owner include/linux/page_owner.h:24 [inline]
 free_pages_prepare mm/page_alloc.c:1352 [inline]
 free_pcp_prepare+0x374/0x870 mm/page_alloc.c:1404
 free_unref_page_prepare mm/page_alloc.c:3325 [inline]
 free_unref_page+0x19/0x690 mm/page_alloc.c:3404
 kvfree+0x42/0x50 mm/util.c:613
 rcu_do_batch kernel/rcu/tree.c:2527 [inline]
 rcu_core+0x7b1/0x1820 kernel/rcu/tree.c:2778
 __do_softirq+0x29b/0x9c2 kernel/softirq.c:558

Memory state around the buggy address:
 ffff88801c1a7f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff88801c1a7f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
>ffff88801c1a8000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                   ^
 ffff88801c1a8080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff88801c1a8100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff

Fixes: 2420b79f8c18 ("netfilter: debug: check for sorted array")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/netfilter/core.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 3f0bdc728f89..ada3163b3fa3 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -335,14 +335,15 @@ static int __nf_register_net_hook(struct net *net, int pf,
 	p = nf_entry_dereference(*pp);
 	new_hooks = nf_hook_entries_grow(p, reg);
 
-	if (!IS_ERR(new_hooks))
+	if (!IS_ERR(new_hooks)) {
+		hooks_validate(new_hooks);
 		rcu_assign_pointer(*pp, new_hooks);
+	}
 
 	mutex_unlock(&nf_hook_mutex);
 	if (IS_ERR(new_hooks))
 		return PTR_ERR(new_hooks);
 
-	hooks_validate(new_hooks);
 #ifdef CONFIG_NETFILTER_INGRESS
 	if (pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
 		net_inc_ingress_queue();
-- 
Gitee


From 8db3cf08ad706f3c2a5d37ddcaa069b1d65da584 Mon Sep 17 00:00:00 2001
From: Antony Antony <antony.antony@secunet.com>
Date: Sat, 7 May 2022 08:24:24 +0000
Subject: [PATCH 295/340] xfrm: fix the if_id check in changelink

stable inclusion
from linux-4.19.233
commit 33c4a43021acfb91e289b2d92e2db0f4a8fcf41e
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 6d0d95a1c2b07270870e7be16575c513c29af3f1 upstream.

if_id will be always 0, because it was not yet initialized.

Fixes: 8dce43919566 ("xfrm: interface with if_id 0 should return error")
Reported-by: Pavel Machek <pavel@denx.de>
Signed-off-by: Antony Antony <antony.antony@secunet.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/xfrm/xfrm_interface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 6bfd7b8249da..18d27ce26600 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -698,12 +698,12 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[],
 	struct net *net = xi->net;
 	struct xfrm_if_parms p = {};
 
+	xfrmi_netlink_parms(data, &p);
 	if (!p.if_id) {
 		NL_SET_ERR_MSG(extack, "if_id must be non zero");
 		return -EINVAL;
 	}
 
-	xfrmi_netlink_parms(data, &p);
 	xi = xfrmi_locate(net, &p);
 	if (!xi) {
 		xi = netdev_priv(dev);
-- 
Gitee


From 48c4b4b58ff6bea73210f19e6de25b0e75fbf702 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@nvidia.com>
Date: Sat, 7 May 2022 08:24:25 +0000
Subject: [PATCH 296/340] xfrm: enforce validity of offload input flags

stable inclusion
from linux-4.19.233
commit 94c115d1380400f0001640619244dfe7a3a04772
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 7c76ecd9c99b6e9a771d813ab1aa7fa428b3ade1 upstream.

struct xfrm_user_offload has flags variable that received user input,
but kernel didn't check if valid bits were provided. It caused a situation
where not sanitized input was forwarded directly to the drivers.

For example, XFRM_OFFLOAD_IPV6 define that was exposed, was used by
strongswan, but not implemented in the kernel at all.

As a solution, check and sanitize input flags to forward
XFRM_OFFLOAD_INBOUND to the drivers.

Fixes: d77e38e612a0 ("xfrm: Add an IPsec hardware offloading API")
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/uapi/linux/xfrm.h | 6 ++++++
 net/xfrm/xfrm_device.c    | 6 +++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 5f3b9fec7b5f..57d7d5a99c9d 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -504,6 +504,12 @@ struct xfrm_user_offload {
 	int				ifindex;
 	__u8				flags;
 };
+/* This flag was exposed without any kernel code that supporting it.
+ * Unfortunately, strongswan has the code that uses sets this flag,
+ * which makes impossible to reuse this bit.
+ *
+ * So leave it here to make sure that it won't be reused by mistake.
+ */
 #define XFRM_OFFLOAD_IPV6	1
 #define XFRM_OFFLOAD_INBOUND	2
 
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index e7a0ce98479f..8a9f02997067 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -153,6 +153,9 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 	if (x->encap || x->tfcpad)
 		return -EINVAL;
 
+	if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND))
+		return -EINVAL;
+
 	dev = dev_get_by_index(net, xuo->ifindex);
 	if (!dev) {
 		if (!(xuo->flags & XFRM_OFFLOAD_INBOUND)) {
@@ -190,7 +193,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
 
 	xso->dev = dev;
 	xso->num_exthdrs = 1;
-	xso->flags = xuo->flags;
+	/* Don't forward bit that is not implemented */
+	xso->flags = xuo->flags & ~XFRM_OFFLOAD_IPV6;
 
 	err = dev->xfrmdev_ops->xdo_dev_state_add(x);
 	if (err) {
-- 
Gitee


From 7078f0dd20c2d448c6af3c2451eefb0b84070e25 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 7 May 2022 08:24:26 +0000
Subject: [PATCH 297/340] netfilter: nf_queue: don't assume sk is full socket

stable inclusion
from linux-4.19.233
commit 6d9719ef61a99d1ed750b510d32620a6c4bb7397
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 747670fd9a2d1b7774030dba65ca022ba442ce71 upstream.

There is no guarantee that state->sk refers to a full socket.

If refcount transitions to 0, sock_put calls sk_free which then ends up
with garbage fields.

I'd like to thank Oleksandr Natalenko and Jiri Benc for considerable
debug work and pointing out state->sk oddities.

Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener")
Tested-by: Oleksandr Natalenko <oleksandr@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/netfilter/nf_queue.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ee6d98355081..ca0c9a9dcdc8 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -46,6 +46,15 @@ void nf_unregister_queue_handler(struct net *net)
 }
 EXPORT_SYMBOL(nf_unregister_queue_handler);
 
+static void nf_queue_sock_put(struct sock *sk)
+{
+#ifdef CONFIG_INET
+	sock_gen_put(sk);
+#else
+	sock_put(sk);
+#endif
+}
+
 void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
@@ -56,7 +65,7 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 	if (state->out)
 		dev_put(state->out);
 	if (state->sk)
-		sock_put(state->sk);
+		nf_queue_sock_put(state->sk);
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (entry->skb->nf_bridge) {
 		struct net_device *physdev;
-- 
Gitee


From 856bec9c04cc6ad11c5bbcace8ff6fe1bac5412e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 7 May 2022 08:24:27 +0000
Subject: [PATCH 298/340] netfilter: nf_queue: fix possible use-after-free

stable inclusion
from linux-4.19.233
commit 34dc4a6a7f261736ef7183868a5bddad31c7f9e3
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit c3873070247d9e3c7a6b0cf9bf9b45e8018427b1 upstream.

Eric Dumazet says:
  The sock_hold() side seems suspect, because there is no guarantee
  that sk_refcnt is not already 0.

On failure, we cannot queue the packet and need to indicate an
error.  The packet will be dropped by the caller.

v2: split skb prefetch hunk into separate change

Fixes: 271b72c7fa82c ("udp: RCU handling for Unicast packets.")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/netfilter/nf_queue.h |  2 +-
 net/netfilter/nf_queue.c         | 11 +++++++++--
 net/netfilter/nfnetlink_queue.c  | 12 +++++++++---
 3 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h
index a50a69f5334c..861414a62ce1 100644
--- a/include/net/netfilter/nf_queue.h
+++ b/include/net/netfilter/nf_queue.h
@@ -32,7 +32,7 @@ void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *q
 void nf_unregister_queue_handler(struct net *net);
 void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict);
 
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry);
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry);
 void nf_queue_entry_release_refs(struct nf_queue_entry *entry);
 
 static inline void init_hashrandom(u32 *jhash_initval)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index ca0c9a9dcdc8..84c59de27882 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -82,10 +82,13 @@ void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
 EXPORT_SYMBOL_GPL(nf_queue_entry_release_refs);
 
 /* Bump dev refs so they don't vanish while packet is out */
-void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
+bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 {
 	struct nf_hook_state *state = &entry->state;
 
+	if (state->sk && !refcount_inc_not_zero(&state->sk->sk_refcnt))
+		return false;
+
 	if (state->in)
 		dev_hold(state->in);
 	if (state->out)
@@ -104,6 +107,7 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry)
 			dev_hold(physdev);
 	}
 #endif
+	return true;
 }
 EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs);
 
@@ -195,7 +199,10 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state,
 		.size	= sizeof(*entry) + route_key_size,
 	};
 
-	nf_queue_entry_get_refs(entry);
+	if (!nf_queue_entry_get_refs(entry)) {
+		kfree(entry);
+		return -ENOTCONN;
+	}
 
 	switch (entry->state.pf) {
 	case AF_INET:
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 8955431f2ab2..a5aff2834bd6 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -716,9 +716,15 @@ static struct nf_queue_entry *
 nf_queue_entry_dup(struct nf_queue_entry *e)
 {
 	struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
-	if (entry)
-		nf_queue_entry_get_refs(entry);
-	return entry;
+
+	if (!entry)
+		return NULL;
+
+	if (nf_queue_entry_get_refs(entry))
+		return entry;
+
+	kfree(entry);
+	return NULL;
 }
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-- 
Gitee


From e044091614ea835e89ae4e954c2b4538b3e05837 Mon Sep 17 00:00:00 2001
From: Lukas Wunner <lukas@wunner.de>
Date: Sat, 7 May 2022 08:24:28 +0000
Subject: [PATCH 299/340] PCI: pciehp: Fix infinite loop in IRQ handler upon
 power fault

stable inclusion
from linux-4.19.233
commit ff27f7d0333cff89ec85c419f431aca1b38fb16a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 23584c1ed3e15a6f4bfab8dc5a88d94ab929ee12 upstream.

The Power Fault Detected bit in the Slot Status register differs from
all other hotplug events in that it is sticky:  It can only be cleared
after turning off slot power.  Per PCIe r5.0, sec. 6.7.1.8:

  If a power controller detects a main power fault on the hot-plug slot,
  it must automatically set its internal main power fault latch [...].
  The main power fault latch is cleared when software turns off power to
  the hot-plug slot.

The stickiness used to cause interrupt storms and infinite loops which
were fixed in 2009 by commits 5651c48cfafe ("PCI pciehp: fix power fault
interrupt storm problem") and 99f0169c17f3 ("PCI: pciehp: enable
software notification on empty slots").

Unfortunately in 2020 the infinite loop issue was inadvertently
reintroduced by commit 8edf5332c393 ("PCI: pciehp: Fix MSI interrupt
race"):  The hardirq handler pciehp_isr() clears the PFD bit until
pciehp's power_fault_detected flag is set.  That happens in the IRQ
thread pciehp_ist(), which never learns of the event because the hardirq
handler is stuck in an infinite loop.  Fix by setting the
power_fault_detected flag already in the hardirq handler.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=214989
Link: https://lore.kernel.org/linux-pci/DM8PR11MB5702255A6A92F735D90A4446868B9@DM8PR11MB5702.namprd11.prod.outlook.com
Fixes: 8edf5332c393 ("PCI: pciehp: Fix MSI interrupt race")
Link: https://lore.kernel.org/r/66eaeef31d4997ceea357ad93259f290ededecfd.1637187226.git.lukas@wunner.de
Reported-by: Joseph Bao <joseph.bao@intel.com>
Tested-by: Joseph Bao <joseph.bao@intel.com>
Signed-off-by: Lukas Wunner <lukas@wunner.de>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Cc: stable@vger.kernel.org # v4.19+
Cc: Stuart Hayes <stuart.w.hayes@gmail.com>
[sudip: adjust context]
Signed-off-by: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/pci/hotplug/pciehp_hpc.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c
index c49d1fc86cc1..ac552a85a9b6 100644
--- a/drivers/pci/hotplug/pciehp_hpc.c
+++ b/drivers/pci/hotplug/pciehp_hpc.c
@@ -609,6 +609,8 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
 	 */
 	if (ctrl->power_fault_detected)
 		status &= ~PCI_EXP_SLTSTA_PFD;
+	else if (status & PCI_EXP_SLTSTA_PFD)
+		ctrl->power_fault_detected = true;
 
 	events |= status;
 	if (!events) {
@@ -618,7 +620,7 @@ static irqreturn_t pciehp_isr(int irq, void *dev_id)
 	}
 
 	if (status) {
-		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, events);
+		pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, status);
 
 		/*
 		 * In MSI mode, all event bits must be zero before the port
@@ -706,8 +708,7 @@ static irqreturn_t pciehp_ist(int irq, void *dev_id)
 	}
 
 	/* Check Power Fault Detected */
-	if ((events & PCI_EXP_SLTSTA_PFD) && !ctrl->power_fault_detected) {
-		ctrl->power_fault_detected = 1;
+	if (events & PCI_EXP_SLTSTA_PFD) {
 		ctrl_err(ctrl, "Slot(%s): Power fault\n", slot_name(slot));
 		pciehp_set_attention_status(slot, 1);
 		pciehp_green_led_off(slot);
-- 
Gitee


From 3322f20f332cef0d4d5d3478047718e77e2b8c08 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Sat, 7 May 2022 08:24:29 +0000
Subject: [PATCH 300/340] memfd: fix F_SEAL_WRITE after shmem huge page
 allocated

stable inclusion
from linux-4.19.233
commit 463c2accd61fe8c664dfcb9d9de50f632363ad28
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit f2b277c4d1c63a85127e8aa2588e9cc3bd21cb99 upstream.

Wangyong reports: after enabling tmpfs filesystem to support transparent
hugepage with the following command:

  echo always > /sys/kernel/mm/transparent_hugepage/shmem_enabled

the docker program tries to add F_SEAL_WRITE through the following
command, but it fails unexpectedly with errno EBUSY:

  fcntl(5, F_ADD_SEALS, F_SEAL_WRITE) = -1.

That is because memfd_tag_pins() and memfd_wait_for_pins() were never
updated for shmem huge pages: checking page_mapcount() against
page_count() is hopeless on THP subpages - they need to check
total_mapcount() against page_count() on THP heads only.

Make memfd_tag_pins() (compared > 1) as strict as memfd_wait_for_pins()
(compared != 1): either can be justified, but given the non-atomic
total_mapcount() calculation, it is better now to be strict.  Bear in
mind that total_mapcount() itself scans all of the THP subpages, when
choosing to take an XA_CHECK_SCHED latency break.

Also fix the unlikely xa_is_value() case in memfd_wait_for_pins(): if a
page has been swapped out since memfd_tag_pins(), then its refcount must
have fallen, and so it can safely be untagged.

Link: https://lkml.kernel.org/r/a4f79248-df75-2c8c-3df-ba3317ccb5da@google.com
Signed-off-by: Hugh Dickins <hughd@google.com>
Reported-by: Zeal Robot <zealci@zte.com.cn>
Reported-by: wangyong <wang.yong12@zte.com.cn>
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Matthew Wilcox (Oracle) <willy@infradead.org>
Cc: CGEL ZTE <cgel.zte@gmail.com>
Cc: Kirill A. Shutemov <kirill@shutemov.name>
Cc: Song Liu <songliubraving@fb.com>
Cc: Yang Yang <yang.yang29@zte.com.cn>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/memfd.c | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/mm/memfd.c b/mm/memfd.c
index 9e68a4320a0e..2d19288a093e 100644
--- a/mm/memfd.c
+++ b/mm/memfd.c
@@ -34,26 +34,35 @@ static void memfd_tag_pins(struct address_space *mapping)
 	void __rcu **slot;
 	pgoff_t start;
 	struct page *page;
-	unsigned int tagged = 0;
+	int latency = 0;
+	int cache_count;
 
 	lru_add_drain();
 	start = 0;
 
 	xa_lock_irq(&mapping->i_pages);
 	radix_tree_for_each_slot(slot, &mapping->i_pages, &iter, start) {
+		cache_count = 1;
 		page = radix_tree_deref_slot_protected(slot, &mapping->i_pages.xa_lock);
-		if (!page || radix_tree_exception(page)) {
+		if (!page || radix_tree_exception(page) || PageTail(page)) {
 			if (radix_tree_deref_retry(page)) {
 				slot = radix_tree_iter_retry(&iter);
 				continue;
 			}
-		} else if (page_count(page) - page_mapcount(page) > 1) {
-			radix_tree_tag_set(&mapping->i_pages, iter.index,
-					   MEMFD_TAG_PINNED);
+		} else {
+			if (PageTransHuge(page) && !PageHuge(page))
+				cache_count = HPAGE_PMD_NR;
+			if (cache_count !=
+			    page_count(page) - total_mapcount(page)) {
+				radix_tree_tag_set(&mapping->i_pages,
+						iter.index, MEMFD_TAG_PINNED);
+			}
 		}
 
-		if (++tagged % 1024)
+		latency += cache_count;
+		if (latency < 1024)
 			continue;
+		latency = 0;
 
 		slot = radix_tree_iter_resume(slot, &iter);
 		xa_unlock_irq(&mapping->i_pages);
@@ -79,6 +88,7 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 	pgoff_t start;
 	struct page *page;
 	int error, scan;
+	int cache_count;
 
 	memfd_tag_pins(mapping);
 
@@ -107,8 +117,12 @@ static int memfd_wait_for_pins(struct address_space *mapping)
 				page = NULL;
 			}
 
-			if (page &&
-			    page_count(page) - page_mapcount(page) != 1) {
+			cache_count = 1;
+			if (page && PageTransHuge(page) && !PageHuge(page))
+				cache_count = HPAGE_PMD_NR;
+
+			if (page && cache_count !=
+			    page_count(page) - total_mapcount(page)) {
 				if (scan < LAST_SCAN)
 					goto continue_resched;
 
-- 
Gitee


From 7be896bfa0f4688239f4f85cf9913cfda1b537bc Mon Sep 17 00:00:00 2001
From: suresh kumar <suresh2514@gmail.com>
Date: Sat, 7 May 2022 08:24:30 +0000
Subject: [PATCH 301/340] net-sysfs: add check for netdevice being present to
 speed_show

stable inclusion
from linux-4.19.235
commit 75fc8363227a999e8f3d17e2eb28dce5600dcd3f
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit 4224cfd7fb6523f7a9d1c8bb91bb5df1e38eb624 ]

When bringing down the netdevice or system shutdown, a panic can be
triggered while accessing the sysfs path because the device is already
removed.

    [  755.549084] mlx5_core 0000:12:00.1: Shutdown was called
    [  756.404455] mlx5_core 0000:12:00.0: Shutdown was called
    ...
    [  757.937260] BUG: unable to handle kernel NULL pointer dereference at           (null)
    [  758.031397] IP: [<ffffffff8ee11acb>] dma_pool_alloc+0x1ab/0x280

    crash> bt
    ...
    PID: 12649  TASK: ffff8924108f2100  CPU: 1   COMMAND: "amsd"
    ...
     #9 [ffff89240e1a38b0] page_fault at ffffffff8f38c778
        [exception RIP: dma_pool_alloc+0x1ab]
        RIP: ffffffff8ee11acb  RSP: ffff89240e1a3968  RFLAGS: 00010046
        RAX: 0000000000000246  RBX: ffff89243d874100  RCX: 0000000000001000
        RDX: 0000000000000000  RSI: 0000000000000246  RDI: ffff89243d874090
        RBP: ffff89240e1a39c0   R8: 000000000001f080   R9: ffff8905ffc03c00
        R10: ffffffffc04680d4  R11: ffffffff8edde9fd  R12: 00000000000080d0
        R13: ffff89243d874090  R14: ffff89243d874080  R15: 0000000000000000
        ORIG_RAX: ffffffffffffffff  CS: 0010  SS: 0018
    #10 [ffff89240e1a39c8] mlx5_alloc_cmd_msg at ffffffffc04680f3 [mlx5_core]
    #11 [ffff89240e1a3a18] cmd_exec at ffffffffc046ad62 [mlx5_core]
    #12 [ffff89240e1a3ab8] mlx5_cmd_exec at ffffffffc046b4fb [mlx5_core]
    #13 [ffff89240e1a3ae8] mlx5_core_access_reg at ffffffffc0475434 [mlx5_core]
    #14 [ffff89240e1a3b40] mlx5e_get_fec_caps at ffffffffc04a7348 [mlx5_core]
    #15 [ffff89240e1a3bb0] get_fec_supported_advertised at ffffffffc04992bf [mlx5_core]
    #16 [ffff89240e1a3c08] mlx5e_get_link_ksettings at ffffffffc049ab36 [mlx5_core]
    #17 [ffff89240e1a3ce8] __ethtool_get_link_ksettings at ffffffff8f25db46
    #18 [ffff89240e1a3d48] speed_show at ffffffff8f277208
    #19 [ffff89240e1a3dd8] dev_attr_show at ffffffff8f0b70e3
    #20 [ffff89240e1a3df8] sysfs_kf_seq_show at ffffffff8eedbedf
    #21 [ffff89240e1a3e18] kernfs_seq_show at ffffffff8eeda596
    #22 [ffff89240e1a3e28] seq_read at ffffffff8ee76d10
    #23 [ffff89240e1a3e98] kernfs_fop_read at ffffffff8eedaef5
    #24 [ffff89240e1a3ed8] vfs_read at ffffffff8ee4e3ff
    #25 [ffff89240e1a3f08] sys_read at ffffffff8ee4f27f
    #26 [ffff89240e1a3f50] system_call_fastpath at ffffffff8f395f92

    crash> net_device.state ffff89443b0c0000
      state = 0x5  (__LINK_STATE_START| __LINK_STATE_NOCARRIER)

To prevent this scenario, we also make sure that the netdevice is present.

Signed-off-by: suresh kumar <suresh2514@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/core/net-sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e5dc04cb5599..7a11b2d90975 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -203,7 +203,7 @@ static ssize_t speed_show(struct device *dev,
 	if (!rtnl_trylock())
 		return restart_syscall();
 
-	if (netif_running(netdev)) {
+	if (netif_running(netdev) && netif_device_present(netdev)) {
 		struct ethtool_link_ksettings cmd;
 
 		if (!__ethtool_get_link_ksettings(netdev, &cmd))
-- 
Gitee


From fb435f3bc51428a9e78694770ba3c9818d8ba3a3 Mon Sep 17 00:00:00 2001
From: Josh Triplett <josh@joshtriplett.org>
Date: Sat, 7 May 2022 08:24:31 +0000
Subject: [PATCH 302/340] ext4: add check to prevent attempting to resize an fs
 with sparse_super2

stable inclusion
from linux-4.19.235
commit 056d829499d3e9b5bb2c4e9a1d96d4e4f77b5ae2
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit b1489186cc8391e0c1e342f9fbc3eedf6b944c61 upstream.

The in-kernel ext4 resize code doesn't support filesystem with the
sparse_super2 feature. It fails with errors like this and doesn't finish
the resize:
EXT4-fs (loop0): resizing filesystem from 16640 to 7864320 blocks
EXT4-fs warning (device loop0): verify_reserved_gdb:760: reserved GDT 2 missing grp 1 (32770)
EXT4-fs warning (device loop0): ext4_resize_fs:2111: error (-22) occurred during file system resize
EXT4-fs (loop0): resized filesystem to 2097152

To reproduce:
mkfs.ext4 -b 4096 -I 256 -J size=32 -E resize=$((256*1024*1024)) -O sparse_super2 ext4.img 65M
truncate -s 30G ext4.img
mount ext4.img /mnt
python3 -c 'import fcntl, os, struct ; fd = os.open("/mnt", os.O_RDONLY | os.O_DIRECTORY) ; fcntl.ioctl(fd, 0x40086610, struct.pack("Q", 30 * 1024 * 1024 * 1024 // 4096), False) ; os.close(fd)'
dmesg | tail
e2fsck ext4.img

The userspace resize2fs tool has a check for this case: it checks if the
filesystem has sparse_super2 set and if the kernel provides
/sys/fs/ext4/features/sparse_super2. However, the former check requires
manually reading and parsing the filesystem superblock.

Detect this case in ext4_resize_begin and error out early with a clear
error message.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Link: https://lore.kernel.org/r/74b8ae78405270211943cd7393e65586c5faeed1.1623093259.git.josh@joshtriplett.org
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/resize.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 08d67f79aed7..7441e28d2706 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -74,6 +74,11 @@ int ext4_resize_begin(struct super_block *sb)
 		return -EPERM;
 	}
 
+	if (ext4_has_feature_sparse_super2(sb)) {
+		ext4_msg(sb, KERN_ERR, "Online resizing not supported with sparse_super2");
+		return -EOPNOTSUPP;
+	}
+
 	if (test_and_set_bit_lock(EXT4_FLAGS_RESIZING,
 				  &EXT4_SB(sb)->s_ext4_flags))
 		ret = -EBUSY;
-- 
Gitee


From 8fca9e9d82611f9d1d85d63a8685038b79d1a1a8 Mon Sep 17 00:00:00 2001
From: Kai Lueke <kailueke@linux.microsoft.com>
Date: Sat, 7 May 2022 08:24:32 +0000
Subject: [PATCH 303/340] Revert "xfrm: state and policy should fail if
 XFRMA_IF_ID 0"

stable inclusion
from linux-4.19.236
commit c8c9220cc0fb0dcdcce140533cc46128bd836347
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit a3d9001b4e287fc043e5539d03d71a32ab114bcb upstream.

This reverts commit 68ac0f3810e76a853b5f7b90601a05c3048b8b54 because ID
0 was meant to be used for configuring the policy/state without
matching for a specific interface (e.g., Cilium is affected, see
https://github.com/cilium/cilium/pull/18789 and
https://github.com/cilium/cilium/pull/19019).

Signed-off-by: Kai Lueke <kailueke@linux.microsoft.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/xfrm/xfrm_user.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8d8f9e778cd4..87932f6ad9d7 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -620,13 +620,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net,
 
 	xfrm_smark_init(attrs, &x->props.smark);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!x->if_id) {
-			err = -EINVAL;
-			goto error;
-		}
-	}
 
 	err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]);
 	if (err)
@@ -1332,13 +1327,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh,
 
 	mark = xfrm_mark_get(attrs, &m);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!if_id) {
-			err = -EINVAL;
-			goto out_noput;
-		}
-	}
 
 	if (p->info.seq) {
 		x = xfrm_find_acq_byseq(net, mark, p->info.seq);
@@ -1640,13 +1630,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us
 
 	xfrm_mark_get(attrs, &xp->mark);
 
-	if (attrs[XFRMA_IF_ID]) {
+	if (attrs[XFRMA_IF_ID])
 		xp->if_id = nla_get_u32(attrs[XFRMA_IF_ID]);
-		if (!xp->if_id) {
-			err = -EINVAL;
-			goto error;
-		}
-	}
 
 	return xp;
  error:
-- 
Gitee


From 504cf1ffe1b3023fff5554e92b671655d6ed9c85 Mon Sep 17 00:00:00 2001
From: Yan Yan <evitayan@google.com>
Date: Sat, 7 May 2022 08:24:33 +0000
Subject: [PATCH 304/340] xfrm: Fix xfrm migrate issues when address family
 changes

stable inclusion
from linux-4.19.236
commit f82fdf0230c3ea639306f35daaac7fca11a48a7a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit e03c3bba351f99ad932e8f06baa9da1afc418e02 ]

xfrm_migrate cannot handle address family change of an xfrm_state.
The symptons are the xfrm_state will be migrated to a wrong address,
and sending as well as receiving packets wil be broken.

This commit fixes it by breaking the original xfrm_state_clone
method into two steps so as to update the props.family before
running xfrm_init_state. As the result, xfrm_state's inner mode,
outer mode, type and IP header length in xfrm_state_migrate can
be updated with the new address family.

Tested with additions to Android's kernel unit test suite:
https://android-review.googlesource.com/c/kernel/tests/+/1885354

Signed-off-by: Yan Yan <evitayan@google.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/xfrm/xfrm_state.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 84dea0ad1666..bc7e982a86d1 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1443,9 +1443,6 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig,
 	memcpy(&x->mark, &orig->mark, sizeof(x->mark));
 	memcpy(&x->props.smark, &orig->props.smark, sizeof(x->props.smark));
 
-	if (xfrm_init_state(x) < 0)
-		goto error;
-
 	x->props.flags = orig->props.flags;
 	x->props.extra_flags = orig->props.extra_flags;
 
@@ -1524,6 +1521,11 @@ struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
 	if (!xc)
 		return NULL;
 
+	xc->props.family = m->new_family;
+
+	if (xfrm_init_state(xc) < 0)
+		goto error;
+
 	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
 	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
 
-- 
Gitee


From 5d2ba004b3817cb09f9168331752b448dad89af1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 7 May 2022 08:24:34 +0000
Subject: [PATCH 305/340] tcp: make tcp_read_sock() more robust

stable inclusion
from linux-4.19.236
commit 9c90c0c62b5b5878f32400338983878a5345b050
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit e3d5ea2c011ecb16fb94c56a659364e6b30fac94 ]

If recv_actor() returns an incorrect value, tcp_read_sock()
might loop forever.

Instead, issue a one time warning and make sure to make progress.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/r/20220302161723.3910001-2-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ipv4/tcp.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 5b410171a20b..6d955ee09d1c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1674,11 +1674,13 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				if (!copied)
 					copied = used;
 				break;
-			} else if (used <= len) {
-				seq += used;
-				copied += used;
-				offset += used;
 			}
+			if (WARN_ON_ONCE(used > len))
+				used = len;
+			seq += used;
+			copied += used;
+			offset += used;
+
 			/* If recv_actor drops the lock (e.g. TCP splice
 			 * receive) the skb pointer might be invalid when
 			 * getting here: tcp_collapse might have deleted it
-- 
Gitee


From 9fcc65d0f0b0e35e34e432f9a43756040523035c Mon Sep 17 00:00:00 2001
From: Zhang Qiao <zhangqiao22@huawei.com>
Date: Sat, 7 May 2022 08:24:35 +0000
Subject: [PATCH 306/340] cpuset: Fix unsafe lock order between cpuset lock and
 cpuslock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

stable inclusion
from linux-4.19.236
commit aa44002e7db25f333ddf412fb81e8db6c100841a
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

The backport commit 4eec5fe1c680a ("cgroup/cpuset: Fix a race
between cpuset_attach() and cpu hotplug") looks suspicious since
it comes before commit d74b27d63a8b ("cgroup/cpuset: Change
cpuset_rwsem and hotplug lock order") v5.4-rc1~176^2~30 when
the locking order was: cpuset lock, cpus lock.

Fix it with the correct locking order and reduce the cpus locking
range because only set_cpus_allowed_ptr() needs the protection of
cpus lock.

Fixes: 4eec5fe1c680a ("cgroup/cpuset: Fix a race between cpuset_attach() and cpu hotplug")
Reported-by: Michal Koutný <mkoutny@suse.com>
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/cgroup/cpuset.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 27dd33566df8..3544ee391350 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -1530,9 +1530,13 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 	cgroup_taskset_first(tset, &css);
 	cs = css_cs(css);
 
-	cpus_read_lock();
 	mutex_lock(&cpuset_mutex);
 
+	/*
+	 * It should hold cpus lock because a cpu offline event can
+	 * cause set_cpus_allowed_ptr() failed.
+	 */
+	get_online_cpus();
 	/* prepare for attach */
 	if (cs == &top_cpuset)
 		cpumask_copy(cpus_attach, cpu_possible_mask);
@@ -1551,6 +1555,7 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 		cpuset_change_task_nodemask(task, &cpuset_attach_nodemask_to);
 		cpuset_update_task_spread_flag(cs, task);
 	}
+       put_online_cpus();
 
 	/*
 	 * Change mm for all threadgroup leaders. This is expensive and may
@@ -1586,7 +1591,6 @@ static void cpuset_attach(struct cgroup_taskset *tset)
 		wake_up(&cpuset_attach_wq);
 
 	mutex_unlock(&cpuset_mutex);
-	cpus_read_unlock();
 }
 
 /* The various types of files and directories in a cpuset file system */
-- 
Gitee


From 0d845d504d68ed6e60bc353cccedc4facedd56c3 Mon Sep 17 00:00:00 2001
From: liqiong <liqiong@nfschina.com>
Date: Sat, 7 May 2022 08:24:36 +0000
Subject: [PATCH 307/340] mm: fix dereference a null pointer in
 migrate[_huge]_page_move_mapping()

stable inclusion
from linux-4.19.236
commit deebce9df9ffaa62613bfcd8351d0c43a9a66108
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

Upstream doesn't use radix tree any more in migrate.c, no need this patch.

The two functions look up a slot and dereference the pointer,
If the pointer is null, the kernel would crash and dump.

The 'numad' service calls 'migrate_pages' periodically. If some slots
being replaced (Cache Eviction), the radix_tree_lookup_slot() returns
a null pointer that causes kernel crash.

"numad":  crash> bt
[exception RIP: migrate_page_move_mapping+337]

Introduce pointer checking to avoid dereference a null pointer.

Cc: <stable@vger.kernel.org> # linux-4.19.y
Signed-off-by: liqiong <liqiong@nfschina.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/migrate.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/mm/migrate.c b/mm/migrate.c
index ecfa8829acfd..3a20900e19e0 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -473,6 +473,10 @@ int migrate_page_move_mapping(struct address_space *mapping,
 
 	pslot = radix_tree_lookup_slot(&mapping->i_pages,
  					page_index(page));
+	if (pslot == NULL) {
+		xa_unlock_irq(&mapping->i_pages);
+		return -EAGAIN;
+	}
 
 	expected_count += hpage_nr_pages(page) + page_has_private(page);
 	if (page_count(page) != expected_count ||
@@ -596,6 +600,10 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,
 	xa_lock_irq(&mapping->i_pages);
 
 	pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page));
+	if (pslot == NULL) {
+		xa_unlock_irq(&mapping->i_pages);
+		return -EAGAIN;
+	}
 
 	expected_count = 2 + page_has_private(page);
 	if (page_count(page) != expected_count ||
-- 
Gitee


From 4c3790eb3c7e9f0fcc9a3a29d14afaf50bead9d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 7 May 2022 08:24:37 +0000
Subject: [PATCH 308/340] net/packet: fix slab-out-of-bounds access in
 packet_recvmsg()

stable inclusion
from linux-4.19.236
commit a33dd1e6693f80d805155b3f69c18c2f642915da
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit c700525fcc06b05adfea78039de02628af79e07a ]

syzbot found that when an AF_PACKET socket is using PACKET_COPY_THRESH
and mmap operations, tpacket_rcv() is queueing skbs with
garbage in skb->cb[], triggering a too big copy [1]

Presumably, users of af_packet using mmap() already gets correct
metadata from the mapped buffer, we can simply make sure
to clear 12 bytes that might be copied to user space later.

BUG: KASAN: stack-out-of-bounds in memcpy include/linux/fortify-string.h:225 [inline]
BUG: KASAN: stack-out-of-bounds in packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489
Write of size 165 at addr ffffc9000385fb78 by task syz-executor233/3631

CPU: 0 PID: 3631 Comm: syz-executor233 Not tainted 5.17.0-rc7-syzkaller-02396-g0b3660695e80 #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
Call Trace:
 <TASK>
 __dump_stack lib/dump_stack.c:88 [inline]
 dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
 print_address_description.constprop.0.cold+0xf/0x336 mm/kasan/report.c:255
 __kasan_report mm/kasan/report.c:442 [inline]
 kasan_report.cold+0x83/0xdf mm/kasan/report.c:459
 check_region_inline mm/kasan/generic.c:183 [inline]
 kasan_check_range+0x13d/0x180 mm/kasan/generic.c:189
 memcpy+0x39/0x60 mm/kasan/shadow.c:66
 memcpy include/linux/fortify-string.h:225 [inline]
 packet_recvmsg+0x56c/0x1150 net/packet/af_packet.c:3489
 sock_recvmsg_nosec net/socket.c:948 [inline]
 sock_recvmsg net/socket.c:966 [inline]
 sock_recvmsg net/socket.c:962 [inline]
 ____sys_recvmsg+0x2c4/0x600 net/socket.c:2632
 ___sys_recvmsg+0x127/0x200 net/socket.c:2674
 __sys_recvmsg+0xe2/0x1a0 net/socket.c:2704
 do_syscall_x64 arch/x86/entry/common.c:50 [inline]
 do_syscall_64+0x35/0xb0 arch/x86/entry/common.c:80
 entry_SYSCALL_64_after_hwframe+0x44/0xae
RIP: 0033:0x7fdfd5954c29
Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 41 15 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
RSP: 002b:00007ffcf8e71e48 EFLAGS: 00000246 ORIG_RAX: 000000000000002f
RAX: ffffffffffffffda RBX: 0000000000000003 RCX: 00007fdfd5954c29
RDX: 0000000000000000 RSI: 0000000020000500 RDI: 0000000000000005
RBP: 0000000000000000 R08: 000000000000000d R09: 000000000000000d
R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffcf8e71e60
R13: 00000000000f4240 R14: 000000000000c1ff R15: 00007ffcf8e71e54
 </TASK>

addr ffffc9000385fb78 is located in stack of task syz-executor233/3631 at offset 32 in frame:
 ____sys_recvmsg+0x0/0x600 include/linux/uio.h:246

this frame has 1 object:
 [32, 160) 'addr'

Memory state around the buggy address:
 ffffc9000385fa80: 00 04 f3 f3 f3 f3 f3 00 00 00 00 00 00 00 00 00
 ffffc9000385fb00: 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1 00
>ffffc9000385fb80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 f3
                                                                ^
 ffffc9000385fc00: f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00 f1
 ffffc9000385fc80: f1 f1 f1 00 f2 f2 f2 00 f2 f2 f2 00 00 00 00 00

==================================================================

Fixes: 0fb375fb9b93 ("[AF_PACKET]: Allow for > 8 byte hardware addresses.")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: syzbot <syzkaller@googlegroups.com>
Link: https://lore.kernel.org/r/20220312232958.3535620-1-eric.dumazet@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/packet/af_packet.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index bacb1bbe53e2..ddf0d081b5bc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2241,8 +2241,11 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 					copy_skb = skb_get(skb);
 					skb_head = skb->data;
 				}
-				if (copy_skb)
+				if (copy_skb) {
+					memset(&PACKET_SKB_CB(copy_skb)->sa.ll, 0,
+					       sizeof(PACKET_SKB_CB(copy_skb)->sa.ll));
 					skb_set_owner_r(copy_skb, sk);
+				}
 			}
 			snaplen = po->rx_ring.frame_size - macoff;
 			if ((int)snaplen < 0) {
@@ -3400,6 +3403,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 	sock_recv_ts_and_drops(msg, sk, skb);
 
 	if (msg->msg_name) {
+		const size_t max_len = min(sizeof(skb->cb),
+					   sizeof(struct sockaddr_storage));
 		int copy_len;
 
 		/* If the address length field is there to be filled
@@ -3422,6 +3427,10 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 				msg->msg_namelen = sizeof(struct sockaddr_ll);
 			}
 		}
+		if (WARN_ON_ONCE(copy_len > max_len)) {
+			copy_len = max_len;
+			msg->msg_namelen = copy_len;
+		}
 		memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa, copy_len);
 	}
 
-- 
Gitee


From a54e6bee81f61e53a3e957d6d8e6d98fdfcecc77 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Sat, 7 May 2022 08:24:38 +0000
Subject: [PATCH 309/340] net: handle ARPHRD_PIMREG in dev_is_mac_header_xmit()

stable inclusion
from linux-4.19.236
commit 274076251d72ab5b2de99e3a3f0b577cd251fcc2
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

[ Upstream commit 4ee06de7729d795773145692e246a06448b1eb7a ]

This kind of interface doesn't have a mac header. This patch fixes
bpf_redirect() to a PIM interface.

Fixes: 27b29f63058d ("bpf: add bpf_redirect() helper")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Link: https://lore.kernel.org/r/20220315092008.31423-1-nicolas.dichtel@6wind.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/if_arp.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index e44746de95cd..c697a0524273 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -55,6 +55,7 @@ static inline bool dev_is_mac_header_xmit(const struct net_device *dev)
 	case ARPHRD_VOID:
 	case ARPHRD_NONE:
 	case ARPHRD_RAWIP:
+	case ARPHRD_PIMREG:
 		return false;
 	default:
 		return true;
-- 
Gitee


From 9617c028309bc741f7ed98a6ef2408ee8093082a Mon Sep 17 00:00:00 2001
From: Tadeusz Struk <tadeusz.struk@linaro.org>
Date: Sat, 7 May 2022 08:24:39 +0000
Subject: [PATCH 310/340] net: ipv6: fix skb_over_panic in __ip6_append_data

stable inclusion
from linux-4.19.237
commit 616e2dffaba372cf20b01172a73013ed28cbcc84
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I5646A
CVE: NA

--------------------------------

commit 5e34af4142ffe68f01c8a9acae83300f8911e20c upstream.

Syzbot found a kernel bug in the ipv6 stack:
LINK: https://syzkaller.appspot.com/bug?id=205d6f11d72329ab8d62a610c44c5e7e25415580
The reproducer triggers it by sending a crafted message via sendmmsg()
call, which triggers skb_over_panic, and crashes the kernel:

skbuff: skb_over_panic: text:ffffffff84647fb4 len:65575 put:65575
head:ffff888109ff0000 data:ffff888109ff0088 tail:0x100af end:0xfec0
dev:<NULL>

Update the check that prevents an invalid packet with MTU equal
to the fregment header size to eat up all the space for payload.

The reproducer can be found here:
LINK: https://syzkaller.appspot.com/text?tag=ReproC&x=1648c83fb00000

Reported-by: syzbot+e223cf47ec8ae183f2a0@syzkaller.appspotmail.com
Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
Acked-by: Willem de Bruijn <willemb@google.com>
Link: https://lore.kernel.org/r/20220310232538.1044947-1-tadeusz.struk@linaro.org
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ipv6/ip6_output.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index fd496bda7ec7..373df391dc93 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1325,8 +1325,8 @@ static int __ip6_append_data(struct sock *sk,
 		      sizeof(struct frag_hdr) : 0) +
 		     rt->rt6i_nfheader_len;
 
-	if (mtu < fragheaderlen ||
-	    ((mtu - fragheaderlen) & ~7) + fragheaderlen < sizeof(struct frag_hdr))
+	if (mtu <= fragheaderlen ||
+	    ((mtu - fragheaderlen) & ~7) + fragheaderlen <= sizeof(struct frag_hdr))
 		goto emsgsize;
 
 	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
-- 
Gitee


From 9f38975317cf08b00f4fffabc07de87f3ef7b8e1 Mon Sep 17 00:00:00 2001
From: Wang Wensheng <wangwensheng4@huawei.com>
Date: Sat, 7 May 2022 08:24:40 +0000
Subject: [PATCH 311/340] share_pool: Fix ABBA deadlock

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I54I7W
CVE: NA

There is a ABBA deadlock in the process of sp_group_add_task and
proc_stat_show().

PROCESS A:
sp_group_add_task()        acquire sp_group_sem write lock
    ->sp_init_proc_stat()  acquire sp_spg_stat_sem  write lock
PROCESS B:
proc_stat_show()           acquire sp_spg_stat_sem read lock
    ->idr_proc_stat_cb()   acquire sp_group_sem read lock

Here we choose the simplest way that acquires sp_group_sem and
sp_stat_sem read lock subsequently in proc_stat_show(), since it just
has effect on the process of debug feature.

Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/share_pool.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/mm/share_pool.c b/mm/share_pool.c
index 40147ef85ba5..f33b4b308dcf 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -4118,7 +4118,6 @@ static int idr_proc_stat_cb(int id, void *p, void *data)
 	long sp_res, sp_res_nsize, non_sp_res, non_sp_shm;
 
 	/* to prevent ABBA deadlock, first hold sp_group_sem */
-	down_read(&sp_group_sem);
 	mutex_lock(&spg_stat->lock);
 	hash_for_each(spg_stat->hash, i, spg_proc_stat, gnode) {
 		proc_stat = spg_proc_stat->proc_stat;
@@ -4147,7 +4146,6 @@ static int idr_proc_stat_cb(int id, void *p, void *data)
 		seq_putc(seq, '\n');
 	}
 	mutex_unlock(&spg_stat->lock);
-	up_read(&sp_group_sem);
 	return 0;
 }
 
@@ -4165,10 +4163,16 @@ static int proc_stat_show(struct seq_file *seq, void *offset)
 		   byte2kb(atomic64_read(&kthread_stat.alloc_size)),
 		   byte2kb(atomic64_read(&kthread_stat.k2u_size)));
 
-	/* pay attention to potential ABBA deadlock */
+	/*
+	 * This ugly code is just for fixing the ABBA deadlock against
+	 * sp_group_add_task.
+	 */
+	down_read(&sp_group_sem);
 	down_read(&sp_spg_stat_sem);
 	idr_for_each(&sp_spg_stat_idr, idr_proc_stat_cb, seq);
 	up_read(&sp_spg_stat_sem);
+	up_read(&sp_group_sem);
+
 	return 0;
 }
 
-- 
Gitee


From 1177aad150e5bb5458f80bd92e4be42eb2142fef Mon Sep 17 00:00:00 2001
From: Guo Mengqi <guomengqi3@huawei.com>
Date: Sat, 7 May 2022 08:24:41 +0000
Subject: [PATCH 312/340] sharepool: fix hisi oom deadlock

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I54I59
CVE: NA

--------------------------------------------------

When hisi_oom_notifier_call calls spg_overview_show, it requires the
global rwsem sp_group_sem, which had been held by another process when oomed.
This leads to kernel hungtask. At another position the unecessary sp_group_sem
causes an AA deadlock.

[ 1934.549016] INFO: task klogd:2757 blocked for more than 120 seconds.
[ 1934.562408] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 1934.570231] klogd           D    0  2757   2746 0x00000000
[ 1934.575707] Call trace:
[ 1934.578162]  __switch_to+0xe8/0x150
[ 1934.581648]  __schedule+0x250/0x558
[ 1934.585133]  schedule+0x30/0xf0
[ 1934.588267]  rwsem_down_read_failed+0x10c/0x188
[ 1934.592788]  down_read+0x60/0x68
[ 1934.596015]  spg_overview_show.part.31+0xc8/0xf8
[ 1934.600622]  spg_overview_show+0x2c/0x38
[ 1934.604543]  hisi_oom_notifier_call+0xe8/0x120
[ 1934.608975]  out_of_memory+0x7c/0x570
[ 1934.612631]  __alloc_pages_nodemask+0xcfc/0xd98
[ 1934.617158]  alloc_pages_current+0x88/0xf0
[ 1934.621246]  __page_cache_alloc+0x8c/0xd8
[ 1934.625247]  page_cache_alloc_inode+0x48/0x58
[ 1934.629595]  filemap_fault+0x360/0x8e0
[ 1934.633341]  ext4_filemap_fault+0x38/0x128
[ 1934.637431]  __do_fault+0x50/0x218
[ 1934.640822]  __handle_mm_fault+0x69c/0x9c8
[ 1934.644909]  handle_mm_fault+0xf8/0x200
[ 1934.648740]  do_page_fault+0x220/0x508
[ 1934.652477]  do_translation_fault+0xa8/0xbc
[ 1934.656652]  do_mem_abort+0x68/0x118
[ 1934.660216]  do_el0_ia_bp_hardening+0x6c/0xd8
[ 1934.664565]  el0_ia+0x20/0x24

Signed-off-by: Guo Mengqi <guomengqi3@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/share_pool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/share_pool.c b/mm/share_pool.c
index f33b4b308dcf..54d137eae3cf 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -4077,9 +4077,9 @@ void spg_overview_show(struct seq_file *seq)
 			atomic_read(&sp_overall_stat.spa_total_num));
 	}
 
-	down_read(&sp_group_sem);
+	down_read(&sp_spg_stat_sem);
 	idr_for_each(&sp_spg_stat_idr, idr_spg_stat_cb, seq);
-	up_read(&sp_group_sem);
+	up_read(&sp_spg_stat_sem);
 
 	if (seq != NULL)
 		seq_puts(seq, "\n");
-- 
Gitee


From 1811840c2cdebd0820818392a8217ffbd1be5c67 Mon Sep 17 00:00:00 2001
From: Zhang Jian <zhangjian210@huawei.com>
Date: Sat, 7 May 2022 08:24:42 +0000
Subject: [PATCH 313/340] mm/sharepool: Fix sharepool node id invalid when
 using sp_alloc

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I54IL8
CVE: NA

-----------------------------

When passing numa id to sp_alloc, sometimes numa id does not work.
This is because memory policy will change numa id to a preferred one if
memory policy is set. Fix the error by mbind virtual address to desired
numa id.

Signed-off-by: Zhang Jian <zhangjian210@huawei.com>
Signed-off-by: Zhou Guanghui <zhouguanghui1@huawei.com>
Signed-off-by: Wang Wensheng <wangwensheng4@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/mempolicy.h | 10 ++++++
 mm/mempolicy.c            | 14 ++++++---
 mm/share_pool.c           | 65 ++++++++++++++++++++++++---------------
 3 files changed, 61 insertions(+), 28 deletions(-)

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 00437ae80b6f..17ec6bb919d7 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -219,6 +219,9 @@ static inline bool vma_migratable(struct vm_area_struct *vma)
 extern int mpol_misplaced(struct page *, struct vm_area_struct *, unsigned long);
 extern void mpol_put_task_policy(struct task_struct *);
 
+extern long __do_mbind(unsigned long start, unsigned long len,
+		       unsigned short mode, unsigned short mode_flags,
+		       nodemask_t *nmask, unsigned long flags, struct mm_struct *mm);
 #else
 
 struct mempolicy {};
@@ -322,5 +325,12 @@ static inline int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
 static inline void mpol_put_task_policy(struct task_struct *task)
 {
 }
+
+static long __do_mbind(unsigned long start, unsigned long len,
+		       unsigned short mode, unsigned short mode_flags,
+		       nodemask_t *nmask, unsigned long flags, struct mm_struct *mm)
+{
+	return 0;
+}
 #endif /* CONFIG_NUMA */
 #endif
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 76a577cfc277..36961d6d364e 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1259,11 +1259,10 @@ static struct page *new_page(struct page *page, unsigned long start)
 }
 #endif
 
-static long do_mbind(unsigned long start, unsigned long len,
-		     unsigned short mode, unsigned short mode_flags,
-		     nodemask_t *nmask, unsigned long flags)
+long __do_mbind(unsigned long start, unsigned long len,
+		       unsigned short mode, unsigned short mode_flags,
+		       nodemask_t *nmask, unsigned long flags, struct mm_struct *mm)
 {
-	struct mm_struct *mm = current->mm;
 	struct mempolicy *new;
 	unsigned long end;
 	int err;
@@ -1364,6 +1363,13 @@ static long do_mbind(unsigned long start, unsigned long len,
 	return err;
 }
 
+static long do_mbind(unsigned long start, unsigned long len,
+		     unsigned short mode, unsigned short mode_flags,
+		     nodemask_t *nmask, unsigned long flags)
+{
+	return __do_mbind(start, len, mode, mode_flags, nmask, flags, current->mm);
+}
+
 /*
  * User space interface with variable sized bitmaps for nodelists.
  */
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 54d137eae3cf..833ecb7dd859 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -15,7 +15,6 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-
 #define pr_fmt(fmt) "share pool: " fmt
 
 #include <linux/share_pool.h>
@@ -2174,6 +2173,7 @@ struct sp_alloc_context {
 	bool need_fallocate;
 	struct timespec64 start;
 	struct timespec64 end;
+	bool have_mbind;
 };
 
 static void trace_sp_alloc_begin(struct sp_alloc_context *ac)
@@ -2316,6 +2316,7 @@ static int sp_alloc_prepare(unsigned long size, unsigned long sp_flags,
 	ac->sp_flags = sp_flags;
 	ac->state = ALLOC_NORMAL;
 	ac->need_fallocate = false;
+	ac->have_mbind = false;
 	return 0;
 }
 
@@ -2409,7 +2410,7 @@ static void sp_alloc_fallback(struct sp_area *spa, struct sp_alloc_context *ac)
 }
 
 static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa,
-	struct sp_group_node *spg_node, struct sp_alloc_context *ac)
+			     struct sp_alloc_context *ac)
 {
 	int ret = 0;
 	unsigned long sp_addr = spa->va_start;
@@ -2441,25 +2442,19 @@ static int sp_alloc_populate(struct mm_struct *mm, struct sp_area *spa,
 		if (ret)
 			sp_add_work_compact();
 	}
-	if (ret) {
-		if (spa->spg != spg_none)
-			sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
-		else
-			sp_munmap(mm, spa->va_start, spa->real_size);
-
-		if (unlikely(fatal_signal_pending(current)))
-			pr_warn_ratelimited("allocation failed, current thread is killed\n");
-		else
-			pr_warn_ratelimited("allocation failed due to mm populate failed"
-					    "(potential no enough memory when -12): %d\n", ret);
-		sp_fallocate(spa);  /* need this, otherwise memleak */
-		sp_alloc_fallback(spa, ac);
-	} else {
-		ac->need_fallocate = true;
-	}
 	return ret;
 }
 
+static long sp_mbind(struct mm_struct *mm, unsigned long start, unsigned long len, unsigned long node)
+{
+	nodemask_t nmask;
+
+	nodes_clear(nmask);
+	node_set(node, nmask);
+	return __do_mbind(start, len, MPOL_BIND, MPOL_F_STATIC_NODES,
+			&nmask, MPOL_MF_STRICT, mm);
+}
+
 static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
 	struct sp_group_node *spg_node, struct sp_alloc_context *ac)
 {
@@ -2475,7 +2470,34 @@ static int __sp_alloc_mmap_populate(struct mm_struct *mm, struct sp_area *spa,
 		return ret;
 	}
 
-	ret = sp_alloc_populate(mm, spa, spg_node, ac);
+	if (!ac->have_mbind) {
+		ret = sp_mbind(mm, spa->va_start, spa->real_size, spa->node_id);
+		if (ret < 0) {
+			pr_err("cannot bind the memory range to specified node:%d, err:%d\n",
+				spa->node_id, ret);
+			goto err;
+		}
+		ac->have_mbind = true;
+	}
+
+	ret = sp_alloc_populate(mm, spa, ac);
+	if (ret) {
+err:
+		if (spa->spg != spg_none)
+			sp_alloc_unmap(list_next_entry(spg_node, proc_node)->master->mm, spa, spg_node);
+		else
+			sp_munmap(mm, spa->va_start, spa->real_size);
+
+		if (unlikely(fatal_signal_pending(current)))
+			pr_warn_ratelimited("allocation failed, current thread is killed\n");
+		else
+			pr_warn_ratelimited("allocation failed due to mm populate failed(potential no enough memory when -12): %d\n",
+					    ret);
+		sp_fallocate(spa);  /* need this, otherwise memleak */
+		sp_alloc_fallback(spa, ac);
+	} else
+		ac->need_fallocate = true;
+
 	return ret;
 }
 
@@ -2497,11 +2519,6 @@ static int sp_alloc_mmap_populate(struct sp_area *spa,
 			if (mmap_ret) {
 				if (ac->state != ALLOC_COREDUMP)
 					return mmap_ret;
-				if (ac->spg == spg_none) {
-					sp_alloc_unmap(mm, spa, spg_node);
-					pr_err("dvpp allocation failed due to coredump");
-					return mmap_ret;
-				}
 				ac->state = ALLOC_NORMAL;
 				continue;
 			}
-- 
Gitee


From 42dc8522429a6a8ab62d625b13335da273331719 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Sat, 7 May 2022 09:12:20 +0000
Subject: [PATCH 314/340] io_uring: fix false WARN_ONCE

mainline inclusion
from mainline-v5.13-rc6
commit e6ab8991c5d0b0deae0961dc22c0edd1dee328f5
category: bugfix
bugzilla: 186576, https://gitee.com/openeuler/kernel/issues/I554BJ
CVE: NA

--------------------------------

io_uring: fix false WARN_ONCE

WARNING: CPU: 1 PID: 11749 at fs/io-wq.c:244 io_wqe_wake_worker fs/io-wq.c:244 [inline]
WARNING: CPU: 1 PID: 11749 at fs/io-wq.c:244 io_wqe_enqueue+0x7f6/0x910 fs/io-wq.c:751

A WARN_ON_ONCE() in io_wqe_wake_worker() can be triggered by a valid
userspace setup. Replace it with pr_warn.

Reported-by: syzbot+ea2f1484cffe5109dc10@syzkaller.appspotmail.com
Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/f7ede342c3342c4c26668f5168e2993e38bbd99c.1623949695.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/io-wq.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/fs/io-wq.c b/fs/io-wq.c
index 9c223dfb4b3f..b6e311e57b47 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -282,7 +282,8 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
 	 * Most likely an attempt to queue unbounded work on an io_wq that
 	 * wasn't setup with any unbounded workers.
 	 */
-	WARN_ON_ONCE(!acct->max_workers);
+	if (unlikely(!acct->max_workers))
+		pr_warn_once("io-wq is not configured for unbound workers");
 
 	rcu_read_lock();
 	ret = io_wqe_activate_free_worker(wqe);
@@ -1050,6 +1051,8 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 
 	if (WARN_ON_ONCE(!data->free_work || !data->do_work))
 		return ERR_PTR(-EINVAL);
+	if (WARN_ON_ONCE(!bounded))
+		return ERR_PTR(-EINVAL);
 
 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
 	if (!wq)
-- 
Gitee


From f25fd7563d1dee376593b19cca7c3ff789ddd998 Mon Sep 17 00:00:00 2001
From: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Date: Mon, 9 May 2022 03:38:43 +0000
Subject: [PATCH 315/340] ixgbe: add the ability for the PF to disable VF link
 state

mainline inclusion
from mainline-v5.18-rc1
commit 366fd1000995d4cf64e1a61a0d78a051550b9841
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I532H9
CVE: CVE-2021-33061

--------------------------------

Add support for ndo_set_vf_link_state the Network Device Option that
allows the PF driver to control the virtual link state of the VF devices.
Without this change a VF cannot be disabled/enabled by the administrator.
In the implementation the auto state takes over PF link state to
VF link setting, the enable state is not supported, the disable state
shut off the VF link regardless of the PF setting.

Signed-off-by: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

Conflicts:
	drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
	drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |   2 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  11 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h  |   2 +
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.c    | 206 ++++++++++++++----
 .../net/ethernet/intel/ixgbe/ixgbe_sriov.h    |   4 +-
 5 files changed, 181 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 4fc906c6166b..9e7b7c224a31 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -179,6 +179,8 @@ struct vf_data_storage {
 	u16 pf_vlan; /* When set, guest VLAN config not allowed. */
 	u16 pf_qos;
 	u16 tx_rate;
+	int link_enable;
+	int link_state;
 	u8 spoofchk_enabled;
 	bool rss_query_enabled;
 	u8 trusted;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index b52585fbb295..d8f5df230a7d 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -5622,6 +5622,9 @@ static void ixgbe_up_complete(struct ixgbe_adapter *adapter)
 	ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT);
 	ctrl_ext |= IXGBE_CTRL_EXT_PFRSTD;
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
+
+	/* update setting rx tx for all active vfs */
+	ixgbe_set_all_vfs(adapter);
 }
 
 void ixgbe_reinit_locked(struct ixgbe_adapter *adapter)
@@ -6074,11 +6077,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 		for (i = 0 ; i < adapter->num_vfs; i++)
 			adapter->vfinfo[i].clear_to_send = false;
 
-		/* ping all the active vfs to let them know we are going down */
-		ixgbe_ping_all_vfs(adapter);
-
-		/* Disable all VFTE/VFRE TX/RX */
-		ixgbe_disable_tx_rx(adapter);
+		/* update setting rx tx for all active vfs */
+		ixgbe_set_all_vfs(adapter);
 	}
 
 	/* disable transmits in the hardware now that interrupts are off */
@@ -10288,6 +10288,7 @@ static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_set_vf_vlan	= ixgbe_ndo_set_vf_vlan,
 	.ndo_set_vf_rate	= ixgbe_ndo_set_vf_bw,
 	.ndo_set_vf_spoofchk	= ixgbe_ndo_set_vf_spoofchk,
+	.ndo_set_vf_link_state	= ixgbe_ndo_set_vf_link_state,
 	.ndo_set_vf_rss_query_en = ixgbe_ndo_set_vf_rss_query_en,
 	.ndo_set_vf_trust	= ixgbe_ndo_set_vf_trust,
 	.ndo_get_vf_config	= ixgbe_ndo_get_vf_config,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
index e085b6520dac..2246b07beae4 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_mbx.h
@@ -80,6 +80,8 @@ enum ixgbe_pfvf_api_rev {
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN 4
 /* word in permanent address message with the current multicast type */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 8aaf856771d7..1f0e84120b32 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -96,6 +96,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 	for (i = 0; i < num_vfs; i++) {
 		/* enable spoof checking for all VFs */
 		adapter->vfinfo[i].spoofchk_enabled = true;
+		adapter->vfinfo[i].link_enable = true;
 
 		/* We support VF RSS querying only for 82599 and x540
 		 * devices at the moment. These devices share RSS
@@ -816,6 +817,57 @@ static inline void ixgbe_write_qde(struct ixgbe_adapter *adapter, u32 vf,
 	}
 }
 
+/**
+ * ixgbe_set_vf_rx_tx - Set VF rx tx
+ * @adapter: Pointer to adapter struct
+ * @vf: VF identifier
+ *
+ * Set or reset correct transmit and receive for vf
+ **/
+static void ixgbe_set_vf_rx_tx(struct ixgbe_adapter *adapter, int vf)
+{
+	u32 reg_cur_tx, reg_cur_rx, reg_req_tx, reg_req_rx;
+	struct ixgbe_hw *hw = &adapter->hw;
+	u32 reg_offset, vf_shift;
+
+	vf_shift = vf % 32;
+	reg_offset = vf / 32;
+
+	reg_cur_tx = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset));
+	reg_cur_rx = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
+
+	if (adapter->vfinfo[vf].link_enable) {
+		reg_req_tx = reg_cur_tx | 1 << vf_shift;
+		reg_req_rx = reg_cur_rx | 1 << vf_shift;
+	} else {
+		reg_req_tx = reg_cur_tx & ~(1 << vf_shift);
+		reg_req_rx = reg_cur_rx & ~(1 << vf_shift);
+	}
+
+	/* The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs.
+	 * For more info take a look at ixgbe_set_vf_lpe
+	 */
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
+		struct net_device *dev = adapter->netdev;
+		int pf_max_frame = dev->mtu + ETH_HLEN;
+
+#if IS_ENABLED(CONFIG_FCOE)
+		if (dev->features & NETIF_F_FCOE_MTU)
+			pf_max_frame = max_t(int, pf_max_frame,
+					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
+#endif /* CONFIG_FCOE */
+
+		if (pf_max_frame > ETH_FRAME_LEN)
+			reg_req_rx = reg_cur_rx & ~(1 << vf_shift);
+	}
+
+	/* Enable/Disable particular VF */
+	if (reg_cur_tx != reg_req_tx)
+		IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg_req_tx);
+	if (reg_cur_rx != reg_req_rx)
+		IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg_req_rx);
+}
+
 static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 {
 	struct ixgbe_ring_feature *vmdq = &adapter->ring_feature[RING_F_VMDQ];
@@ -841,11 +893,6 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 	vf_shift = vf % 32;
 	reg_offset = vf / 32;
 
-	/* enable transmit for vf */
-	reg = IXGBE_READ_REG(hw, IXGBE_VFTE(reg_offset));
-	reg |= BIT(vf_shift);
-	IXGBE_WRITE_REG(hw, IXGBE_VFTE(reg_offset), reg);
-
 	/* force drop enable for all VF Rx queues */
 	reg = IXGBE_QDE_ENABLE;
 	if (adapter->vfinfo[vf].pf_vlan)
@@ -853,27 +900,7 @@ static int ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf)
 
 	ixgbe_write_qde(adapter, vf, reg);
 
-	/* enable receive for vf */
-	reg = IXGBE_READ_REG(hw, IXGBE_VFRE(reg_offset));
-	reg |= BIT(vf_shift);
-	/*
-	 * The 82599 cannot support a mix of jumbo and non-jumbo PF/VFs.
-	 * For more info take a look at ixgbe_set_vf_lpe
-	 */
-	if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
-		struct net_device *dev = adapter->netdev;
-		int pf_max_frame = dev->mtu + ETH_HLEN;
-
-#ifdef CONFIG_FCOE
-		if (dev->features & NETIF_F_FCOE_MTU)
-			pf_max_frame = max_t(int, pf_max_frame,
-					     IXGBE_FCOE_JUMBO_FRAME_SIZE);
-
-#endif /* CONFIG_FCOE */
-		if (pf_max_frame > ETH_FRAME_LEN)
-			reg &= ~BIT(vf_shift);
-	}
-	IXGBE_WRITE_REG(hw, IXGBE_VFRE(reg_offset), reg);
+	ixgbe_set_vf_rx_tx(adapter, vf);
 
 	/* enable VF mailbox for further messages */
 	adapter->vfinfo[vf].clear_to_send = true;
@@ -1193,6 +1220,25 @@ static int ixgbe_update_vf_xcast_mode(struct ixgbe_adapter *adapter,
 	return 0;
 }
 
+static int ixgbe_get_vf_link_state(struct ixgbe_adapter *adapter,
+				   u32 *msgbuf, u32 vf)
+{
+	u32 *link_state = &msgbuf[1];
+
+	/* verify the PF is supporting the correct API */
+	switch (adapter->vfinfo[vf].vf_api) {
+	case ixgbe_mbox_api_12:
+	case ixgbe_mbox_api_13:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	*link_state = adapter->vfinfo[vf].link_enable;
+
+	return 0;
+}
+
 static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 {
 	u32 mbx_size = IXGBE_VFMAILBOX_SIZE;
@@ -1258,6 +1304,9 @@ static int ixgbe_rcv_msg_from_vf(struct ixgbe_adapter *adapter, u32 vf)
 	case IXGBE_VF_UPDATE_XCAST_MODE:
 		retval = ixgbe_update_vf_xcast_mode(adapter, msgbuf, vf);
 		break;
+	case IXGBE_VF_GET_LINK_STATE:
+		retval = ixgbe_get_vf_link_state(adapter, msgbuf, vf);
+		break;
 	default:
 		e_err(drv, "Unhandled Msg %8.8x\n", msgbuf[0]);
 		retval = IXGBE_ERR_MBX;
@@ -1307,18 +1356,6 @@ void ixgbe_msg_task(struct ixgbe_adapter *adapter)
 	}
 }
 
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter)
-{
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	/* disable transmit and receive for all vfs */
-	IXGBE_WRITE_REG(hw, IXGBE_VFTE(0), 0);
-	IXGBE_WRITE_REG(hw, IXGBE_VFTE(1), 0);
-
-	IXGBE_WRITE_REG(hw, IXGBE_VFRE(0), 0);
-	IXGBE_WRITE_REG(hw, IXGBE_VFRE(1), 0);
-}
-
 static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -1344,6 +1381,21 @@ void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter)
 	}
 }
 
+/**
+ * ixgbe_set_all_vfs - update vfs queues
+ * @adapter: Pointer to adapter struct
+ *
+ * Update setting transmit and receive queues for all vfs
+ **/
+void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter)
+{
+	int i;
+
+	for (i = 0 ; i < adapter->num_vfs; i++)
+		ixgbe_set_vf_link_state(adapter, i,
+					adapter->vfinfo[i].link_state);
+}
+
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
@@ -1641,6 +1693,84 @@ int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
 	return 0;
 }
 
+/**
+ * ixgbe_set_vf_link_state - Set link state
+ * @adapter: Pointer to adapter struct
+ * @vf: VF identifier
+ * @state: required link state
+ *
+ * Set a link force state on/off a single vf
+ **/
+void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state)
+{
+	adapter->vfinfo[vf].link_state = state;
+
+	switch (state) {
+	case IFLA_VF_LINK_STATE_AUTO:
+		if (test_bit(__IXGBE_DOWN, &adapter->state))
+			adapter->vfinfo[vf].link_enable = false;
+		else
+			adapter->vfinfo[vf].link_enable = true;
+		break;
+	case IFLA_VF_LINK_STATE_ENABLE:
+		adapter->vfinfo[vf].link_enable = true;
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		adapter->vfinfo[vf].link_enable = false;
+		break;
+	}
+
+	ixgbe_set_vf_rx_tx(adapter, vf);
+
+	/* restart the VF */
+	adapter->vfinfo[vf].clear_to_send = false;
+	ixgbe_ping_vf(adapter, vf);
+}
+
+/**
+ * ixgbe_ndo_set_vf_link_state - Set link state
+ * @netdev: network interface device structure
+ * @vf: VF identifier
+ * @state: required link state
+ *
+ * Set the link state of a specified VF, regardless of physical link state
+ **/
+int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(netdev);
+	int ret = 0;
+
+	if (vf < 0 || vf >= adapter->num_vfs) {
+		dev_err(&adapter->pdev->dev,
+			"NDO set VF link - invalid VF identifier %d\n", vf);
+		return -EINVAL;
+	}
+
+	switch (state) {
+	case IFLA_VF_LINK_STATE_ENABLE:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state %d - not supported\n",
+			vf, state);
+		break;
+	case IFLA_VF_LINK_STATE_DISABLE:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state disable\n", vf);
+		ixgbe_set_vf_link_state(adapter, vf, state);
+		break;
+	case IFLA_VF_LINK_STATE_AUTO:
+		dev_info(&adapter->pdev->dev,
+			 "NDO set VF %d link state auto\n", vf);
+		ixgbe_set_vf_link_state(adapter, vf, state);
+		break;
+	default:
+		dev_err(&adapter->pdev->dev,
+			"NDO set VF %d - invalid link state %d\n", vf, state);
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
 int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
 				  bool setting)
 {
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
index 3ec21923c89c..0690ecb8dfa3 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h
@@ -17,8 +17,8 @@ void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter);
 #endif
 void ixgbe_msg_task(struct ixgbe_adapter *adapter);
 int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask);
-void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter);
 void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter);
+void ixgbe_set_all_vfs(struct ixgbe_adapter *adapter);
 int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac);
 int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan,
 			   u8 qos, __be16 vlan_proto);
@@ -31,7 +31,9 @@ int ixgbe_ndo_set_vf_rss_query_en(struct net_device *netdev, int vf,
 int ixgbe_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting);
 int ixgbe_ndo_get_vf_config(struct net_device *netdev,
 			    int vf, struct ifla_vf_info *ivi);
+int ixgbe_ndo_set_vf_link_state(struct net_device *netdev, int vf, int state);
 void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter);
+void ixgbe_set_vf_link_state(struct ixgbe_adapter *adapter, int vf, int state);
 int ixgbe_disable_sriov(struct ixgbe_adapter *adapter);
 #ifdef CONFIG_PCI_IOV
 void ixgbe_enable_sriov(struct ixgbe_adapter *adapter, unsigned int max_vfs);
-- 
Gitee


From 2ece2378f8da40d893135e4e3a98daa2b6dc3d58 Mon Sep 17 00:00:00 2001
From: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Date: Mon, 9 May 2022 03:38:44 +0000
Subject: [PATCH 316/340] ixgbe: add improvement for MDD response functionality

mainline inclusion
from mainline-v5.18-rc1
commit 008ca35f6e87be1d60b6af3d1ae247c6d5c2531d
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I532H9
CVE: CVE-2021-33061

--------------------------------

The 82599 PF driver disable VF driver after a special MDD event occurs.
Adds the option for administrators to control whether VFs are
automatically disabled after several MDD events.
The automatically disabling is now the default mode for 82599 PF driver,
as it is more reliable.

This addresses CVE-2021-33061.

Signed-off-by: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

Conflicts:
	drivers/net/ethernet/intel/ixgbe/ixgbe.h
	drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
	drivers/net/ethernet/intel/ixgbe/ixgbe_main.c

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |  4 +++
 .../net/ethernet/intel/ixgbe/ixgbe_ethtool.c  | 21 ++++++++++++++
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 28 ++++++++++++++++++-
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 9e7b7c224a31..074e23b4534b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -186,6 +186,7 @@ struct vf_data_storage {
 	u8 trusted;
 	int xcast_mode;
 	unsigned int vf_api;
+	u8 primary_abort_count;
 };
 
 enum ixgbevf_xcast_modes {
@@ -548,6 +549,8 @@ struct ixgbe_mac_addr {
 #define IXGBE_TRY_LINK_TIMEOUT (4 * HZ)
 #define IXGBE_SFP_POLL_JIFFIES (2 * HZ)	/* SFP poll every 2 seconds */
 
+#define IXGBE_PRIMARY_ABORT_LIMIT	5
+
 /* board specific private data structure */
 struct ixgbe_adapter {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
@@ -607,6 +610,7 @@ struct ixgbe_adapter {
 #define IXGBE_FLAG2_EEE_ENABLED			BIT(15)
 #define IXGBE_FLAG2_RX_LEGACY			BIT(16)
 #define IXGBE_FLAG2_IPSEC_ENABLED		BIT(17)
+#define IXGBE_FLAG2_AUTO_DISABLE_VF		BIT(19)
 
 	/* Tx fast path data */
 	int num_tx_queues;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index 8829bd95d0d3..73e769212c65 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -136,6 +136,8 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = {
 static const char ixgbe_priv_flags_strings[][ETH_GSTRING_LEN] = {
 #define IXGBE_PRIV_FLAGS_LEGACY_RX	BIT(0)
 	"legacy-rx",
+#define IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF	BIT(2)
+	"mdd-disable-vf",
 };
 
 #define IXGBE_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbe_priv_flags_strings)
@@ -3410,6 +3412,9 @@ static u32 ixgbe_get_priv_flags(struct net_device *netdev)
 	if (adapter->flags2 & IXGBE_FLAG2_RX_LEGACY)
 		priv_flags |= IXGBE_PRIV_FLAGS_LEGACY_RX;
 
+	if (adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF)
+		priv_flags |= IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF;
+
 	return priv_flags;
 }
 
@@ -3417,11 +3422,27 @@ static int ixgbe_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	unsigned int flags2 = adapter->flags2;
+	unsigned int i;
 
 	flags2 &= ~IXGBE_FLAG2_RX_LEGACY;
 	if (priv_flags & IXGBE_PRIV_FLAGS_LEGACY_RX)
 		flags2 |= IXGBE_FLAG2_RX_LEGACY;
 
+	flags2 &= ~IXGBE_FLAG2_AUTO_DISABLE_VF;
+	if (priv_flags & IXGBE_PRIV_FLAGS_AUTO_DISABLE_VF) {
+		if (adapter->hw.mac.type == ixgbe_mac_82599EB) {
+			/* Reset primary abort counter */
+			for (i = 0; i < adapter->num_vfs; i++)
+				adapter->vfinfo[i].primary_abort_count = 0;
+
+			flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
+		} else {
+			e_info(probe,
+			       "Cannot set private flags: Operation not supported\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
 	if (flags2 != adapter->flags2) {
 		adapter->flags2 = flags2;
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index d8f5df230a7d..9455c6191694 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7565,6 +7565,27 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter)
 }
 
 #ifdef CONFIG_PCI_IOV
+static void ixgbe_bad_vf_abort(struct ixgbe_adapter *adapter, u32 vf)
+{
+	struct ixgbe_hw *hw = &adapter->hw;
+
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB &&
+	    adapter->flags2 & IXGBE_FLAG2_AUTO_DISABLE_VF) {
+		adapter->vfinfo[vf].primary_abort_count++;
+		if (adapter->vfinfo[vf].primary_abort_count ==
+		    IXGBE_PRIMARY_ABORT_LIMIT) {
+			ixgbe_set_vf_link_state(adapter, vf,
+						IFLA_VF_LINK_STATE_DISABLE);
+			adapter->vfinfo[vf].primary_abort_count = 0;
+
+			e_info(drv,
+			       "Malicious Driver Detection event detected on PF %d VF %d MAC: %pM mdd-disable-vf=on",
+			       hw->bus.func, vf,
+			       adapter->vfinfo[vf].vf_mac_addresses);
+		}
+	}
+}
+
 static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
@@ -7596,8 +7617,10 @@ static void ixgbe_check_for_bad_vf(struct ixgbe_adapter *adapter)
 			continue;
 		pci_read_config_word(vfdev, PCI_STATUS, &status_reg);
 		if (status_reg != IXGBE_FAILED_READ_CFG_WORD &&
-		    status_reg & PCI_STATUS_REC_MASTER_ABORT)
+		    status_reg & PCI_STATUS_REC_MASTER_ABORT) {
+			ixgbe_bad_vf_abort(adapter, vf);
 			pcie_flr(vfdev);
+		}
 	}
 }
 
@@ -10602,6 +10625,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (err)
 		goto err_sw_init;
 
+	if (adapter->hw.mac.type == ixgbe_mac_82599EB)
+		adapter->flags2 |= IXGBE_FLAG2_AUTO_DISABLE_VF;
+
 	/* Make sure the SWFW semaphore is in a valid state */
 	if (hw->mac.ops.init_swfw_sync)
 		hw->mac.ops.init_swfw_sync(hw);
-- 
Gitee


From 15f6c3120c2989460409c4967dd8d776af1896e1 Mon Sep 17 00:00:00 2001
From: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Date: Mon, 9 May 2022 03:38:45 +0000
Subject: [PATCH 317/340] ixgbevf: add disable link state

mainline inclusion
from mainline-v5.18-rc1
commit 443ebdd68b443ea0798c883e8aabf10d75268e92
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I532H9
CVE: CVE-2021-33061

--------------------------------

Add possibility to disable link state if it is administratively
disabled in PF.

It is part of the general functionality that allows the PF driver
to control the state of the virtual link VF devices.

Signed-off-by: Slawomir Mrozowicz <slawomirx.mrozowicz@intel.com>
Tested-by: Konrad Jankowski <konrad0.jankowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>

Conflicts:
	drivers/net/ethernet/intel/ixgbevf/mbx.h
	drivers/net/ethernet/intel/ixgbevf/vf.c

Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h  |  2 +
 .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 11 ++++-
 drivers/net/ethernet/intel/ixgbevf/mbx.h      |  2 +
 drivers/net/ethernet/intel/ixgbevf/vf.c       | 42 +++++++++++++++++++
 drivers/net/ethernet/intel/ixgbevf/vf.h       |  1 +
 5 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index 56a1031dcc07..469df8a242e8 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -383,6 +383,8 @@ struct ixgbevf_adapter {
 	u32 *rss_key;
 	u8 rss_indir_tbl[IXGBEVF_X550_VFRETA_SIZE];
 	u32 flags;
+	bool link_state;
+
 #define IXGBEVF_FLAGS_LEGACY_RX		BIT(1)
 };
 
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 25785fee5422..450c0a95d36d 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -2263,7 +2263,9 @@ static void ixgbevf_negotiate_api(struct ixgbevf_adapter *adapter)
 static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
+	struct pci_dev *pdev = adapter->pdev;
 	struct ixgbe_hw *hw = &adapter->hw;
+	bool state;
 
 	ixgbevf_configure_msix(adapter);
 
@@ -2276,6 +2278,11 @@ static void ixgbevf_up_complete(struct ixgbevf_adapter *adapter)
 
 	spin_unlock_bh(&adapter->mbx_lock);
 
+	state = adapter->link_state;
+	hw->mac.ops.get_link_state(hw, &adapter->link_state);
+	if (state && state != adapter->link_state)
+		dev_info(&pdev->dev, "VF is administratively disabled\n");
+
 	smp_mb__before_atomic();
 	clear_bit(__IXGBEVF_DOWN, &adapter->state);
 	ixgbevf_napi_enable_all(adapter);
@@ -3045,6 +3052,8 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter)
 	adapter->tx_ring_count = IXGBEVF_DEFAULT_TXD;
 	adapter->rx_ring_count = IXGBEVF_DEFAULT_RXD;
 
+	adapter->link_state = true;
+
 	set_bit(__IXGBEVF_DOWN, &adapter->state);
 	return 0;
 
@@ -3277,7 +3286,7 @@ static void ixgbevf_watchdog_subtask(struct ixgbevf_adapter *adapter)
 
 	ixgbevf_watchdog_update_link(adapter);
 
-	if (adapter->link_up)
+	if (adapter->link_up && adapter->link_state)
 		ixgbevf_watchdog_link_is_up(adapter);
 	else
 		ixgbevf_watchdog_link_is_down(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbevf/mbx.h b/drivers/net/ethernet/intel/ixgbevf/mbx.h
index bfd9ae150808..464841e21315 100644
--- a/drivers/net/ethernet/intel/ixgbevf/mbx.h
+++ b/drivers/net/ethernet/intel/ixgbevf/mbx.h
@@ -92,6 +92,8 @@ enum ixgbe_pfvf_api_rev {
 
 #define IXGBE_VF_UPDATE_XCAST_MODE	0x0c
 
+#define IXGBE_VF_GET_LINK_STATE 0x10 /* get vf link state */
+
 /* length of permanent address message returned from PF */
 #define IXGBE_VF_PERMADDR_MSG_LEN	4
 /* word in permanent address message with the current multicast type */
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c
index 7597d099c584..c96a4c3ca075 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.c
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.c
@@ -570,6 +570,46 @@ static s32 ixgbevf_hv_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode)
 	return -EOPNOTSUPP;
 }
 
+/**
+ * ixgbevf_get_link_state_vf - Get VF link state from PF
+ * @hw: pointer to the HW structure
+ * @link_state: link state storage
+ *
+ * Returns state of the operation error or success.
+ */
+static s32 ixgbevf_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state)
+{
+	u32 msgbuf[2];
+	s32 ret_val;
+	s32 err;
+
+	msgbuf[0] = IXGBE_VF_GET_LINK_STATE;
+	msgbuf[1] = 0x0;
+
+	err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2);
+
+	if (err || (msgbuf[0] & IXGBE_VT_MSGTYPE_NACK)) {
+		ret_val = IXGBE_ERR_MBX;
+	} else {
+		ret_val = 0;
+		*link_state = msgbuf[1];
+	}
+
+	return ret_val;
+}
+
+/**
+ * ixgbevf_hv_get_link_state_vf - * Hyper-V variant - just a stub.
+ * @hw: unused
+ * @link_state: unused
+ *
+ * Hyper-V variant; there is no mailbox communication.
+ */
+static s32 ixgbevf_hv_get_link_state_vf(struct ixgbe_hw *hw, bool *link_state)
+{
+	return -EOPNOTSUPP;
+}
+
 /**
  *  ixgbevf_set_vfta_vf - Set/Unset VLAN filter table address
  *  @hw: pointer to the HW structure
@@ -946,6 +986,7 @@ static const struct ixgbe_mac_operations ixgbevf_mac_ops = {
 	.set_rar		= ixgbevf_set_rar_vf,
 	.update_mc_addr_list	= ixgbevf_update_mc_addr_list_vf,
 	.update_xcast_mode	= ixgbevf_update_xcast_mode,
+	.get_link_state		= ixgbevf_get_link_state_vf,
 	.set_uc_addr		= ixgbevf_set_uc_addr_vf,
 	.set_vfta		= ixgbevf_set_vfta_vf,
 	.set_rlpml		= ixgbevf_set_rlpml_vf,
@@ -963,6 +1004,7 @@ static const struct ixgbe_mac_operations ixgbevf_hv_mac_ops = {
 	.set_rar		= ixgbevf_hv_set_rar_vf,
 	.update_mc_addr_list	= ixgbevf_hv_update_mc_addr_list_vf,
 	.update_xcast_mode	= ixgbevf_hv_update_xcast_mode,
+	.get_link_state		= ixgbevf_hv_get_link_state_vf,
 	.set_uc_addr		= ixgbevf_hv_set_uc_addr_vf,
 	.set_vfta		= ixgbevf_hv_set_vfta_vf,
 	.set_rlpml		= ixgbevf_hv_set_rlpml_vf,
diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.h b/drivers/net/ethernet/intel/ixgbevf/vf.h
index d1e9e306653b..45d9269218db 100644
--- a/drivers/net/ethernet/intel/ixgbevf/vf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/vf.h
@@ -42,6 +42,7 @@ struct ixgbe_mac_operations {
 	s32 (*init_rx_addrs)(struct ixgbe_hw *);
 	s32 (*update_mc_addr_list)(struct ixgbe_hw *, struct net_device *);
 	s32 (*update_xcast_mode)(struct ixgbe_hw *, int);
+	s32 (*get_link_state)(struct ixgbe_hw *hw, bool *link_state);
 	s32 (*enable_mc)(struct ixgbe_hw *);
 	s32 (*disable_mc)(struct ixgbe_hw *);
 	s32 (*clear_vfta)(struct ixgbe_hw *);
-- 
Gitee


From 857d0cd57976451a339959d6509529f11c1548c2 Mon Sep 17 00:00:00 2001
From: Chen Wandun <chenwandun@huawei.com>
Date: Mon, 9 May 2022 11:30:08 +0000
Subject: [PATCH 318/340] mm: refactor the reclaim thread of page cache from
 per-cpu to per-node

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

----------------------------------------------

In feature page cache limit, the reclaim work is per-cpu, it is
unnecessary and will result some performance degradation in some
case, such as storage (some cpus are devote for dead loop, no other
works should be excuted on these cpus).

In order to solve this, change the reclaim work thread from per-cpu
to per-node and meanwhile dont bind these work thread to any cpus.

Signed-off-by: Chen Wandun <chenwandun@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/vmscan.c | 61 +++++++++++++++--------------------------------------
 1 file changed, 17 insertions(+), 44 deletions(-)

diff --git a/mm/vmscan.c b/mm/vmscan.c
index d59b22a7ba9a..42f24473756b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -185,7 +185,7 @@ int vm_cache_reclaim_weight __read_mostly;
 int vm_cache_reclaim_weight_min;
 int vm_cache_reclaim_weight_max;
 int vm_cache_reclaim_enable;
-static DEFINE_PER_CPU(struct delayed_work, vmscan_work);
+static struct work_struct vmscan_works[MAX_NUMNODES];
 #endif
 
 static LIST_HEAD(shrinker_list);
@@ -3972,8 +3972,8 @@ static unsigned long __shrink_page_cache(gfp_t mask)
 		.gfp_mask = current_gfp_context(mask),
 		.reclaim_idx = gfp_zone(mask),
 		.may_writepage = !laptop_mode,
-		.nr_to_reclaim = SWAP_CLUSTER_MAX *
-				 (unsigned long)vm_cache_reclaim_weight,
+		.nr_to_reclaim = SWAP_CLUSTER_MAX * nr_cpus_node(numa_node_id()) *
+				 vm_cache_reclaim_weight,
 		.may_unmap = 1,
 		.may_swap = mem_reliable_is_enabled() ? 0 : 1,
 		.no_shrink_slab = mem_reliable_is_enabled() ? 0 : 1,
@@ -3997,26 +3997,21 @@ static DECLARE_DEFERRABLE_WORK(shepherd, shrink_shepherd);
 
 static void shrink_shepherd(struct work_struct *w)
 {
-	int cpu;
 
-	get_online_cpus();
+	int node;
 
-	for_each_online_cpu(cpu) {
-		struct delayed_work *work = &per_cpu(vmscan_work, cpu);
-
-		if (!delayed_work_pending(work) && vm_cache_reclaim_enable)
-			queue_delayed_work_on(cpu, system_wq, work, 0);
+	for_each_online_node(node) {
+		if (!work_pending(&vmscan_works[node]) && vm_cache_reclaim_enable)
+			queue_work_node(node, system_unbound_wq, &vmscan_works[node]);
 	}
 
-	put_online_cpus();
-
 	/* we want all kernel thread to stop */
 	if (vm_cache_reclaim_enable) {
 		if (vm_cache_reclaim_s == 0)
-			schedule_delayed_work(&shepherd,
+			queue_delayed_work(system_unbound_wq, &shepherd,
 				round_jiffies_relative(120 * HZ));
 		else
-			schedule_delayed_work(&shepherd,
+			queue_delayed_work(system_unbound_wq, &shepherd,
 				round_jiffies_relative((unsigned long)
 				vm_cache_reclaim_s * HZ));
 	}
@@ -4024,15 +4019,12 @@ static void shrink_shepherd(struct work_struct *w)
 
 static void shrink_shepherd_timer(void)
 {
-	int cpu;
-
-	for_each_possible_cpu(cpu) {
-		struct delayed_work *work = &per_cpu(vmscan_work, cpu);
+	int i;
 
-		INIT_DEFERRABLE_WORK(work, shrink_page_cache_work);
-	}
+	for (i = 0; i < MAX_NUMNODES; i++)
+		INIT_WORK(&vmscan_works[i], shrink_page_cache_work);
 
-	schedule_delayed_work(&shepherd,
+	queue_delayed_work(system_unbound_wq, &shepherd,
 		round_jiffies_relative((unsigned long)vm_cache_reclaim_s * HZ));
 }
 
@@ -4048,9 +4040,6 @@ unsigned long shrink_page_cache(gfp_t mask)
 
 static void shrink_page_cache_work(struct work_struct *w)
 {
-	struct delayed_work *work = to_delayed_work(w);
-	unsigned long nr_pages;
-
 	/*
 	 * if vm_cache_reclaim_enable or vm_cache_reclaim_s is zero,
 	 * we do not shrink page cache again.
@@ -4063,10 +4052,7 @@ static void shrink_page_cache_work(struct work_struct *w)
 		return;
 
 	/* It should wait more time if we hardly reclaim the page cache */
-	nr_pages = shrink_page_cache(GFP_KERNEL);
-	if ((nr_pages < SWAP_CLUSTER_MAX) && vm_cache_reclaim_enable)
-		queue_delayed_work_on(smp_processor_id(), system_wq, work,
-		round_jiffies_relative((vm_cache_reclaim_s + 120) * HZ));
+	shrink_page_cache(GFP_KERNEL);
 }
 
 static void shrink_page_cache_init(void)
@@ -4088,13 +4074,6 @@ static void shrink_page_cache_init(void)
 	shrink_shepherd_timer();
 }
 
-static int kswapd_cpu_down_prep(unsigned int cpu)
-{
-	cancel_delayed_work_sync(&per_cpu(vmscan_work, cpu));
-
-	return 0;
-}
-
 int cache_reclaim_enable_handler(struct ctl_table *table, int write,
 			void __user *buffer, size_t *length, loff_t *ppos)
 {
@@ -4105,8 +4084,8 @@ int cache_reclaim_enable_handler(struct ctl_table *table, int write,
 		return ret;
 
 	if (write)
-		schedule_delayed_work(&shepherd, round_jiffies_relative(
-			(unsigned long)vm_cache_reclaim_s * HZ));
+		queue_delayed_work(system_unbound_wq, &shepherd,
+		round_jiffies_relative((unsigned long)vm_cache_reclaim_s * HZ));
 
 	return 0;
 }
@@ -4121,7 +4100,7 @@ int cache_reclaim_sysctl_handler(struct ctl_table *table, int write,
 		return ret;
 
 	if (write)
-		mod_delayed_work(system_wq, &shepherd,
+		mod_delayed_work(system_unbound_wq, &shepherd,
 				round_jiffies_relative(
 				(unsigned long)vm_cache_reclaim_s * HZ));
 
@@ -4194,15 +4173,9 @@ static int __init kswapd_init(void)
 	swap_setup();
 	for_each_node_state(nid, N_MEMORY)
  		kswapd_run(nid);
-#ifdef CONFIG_SHRINK_PAGECACHE
-	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
-					"mm/vmscan:online", kswapd_cpu_online,
-					kswapd_cpu_down_prep);
-#else
 	ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
 					"mm/vmscan:online", kswapd_cpu_online,
 					NULL);
-#endif
 	WARN_ON(ret < 0);
 #ifdef CONFIG_SHRINK_PAGECACHE
 	shrink_page_cache_init();
-- 
Gitee


From 1fbc8ee56564350edf4d89464cca29d0a0e4c92a Mon Sep 17 00:00:00 2001
From: Ma Wupeng <mawupeng1@huawei.com>
Date: Mon, 9 May 2022 11:30:09 +0000
Subject: [PATCH 319/340] mm: Update reliable flag in memory allocaion for
 reliable task only in task context

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4SK3S
CVE: NA

--------------------------------

Since interrupt may occupy reliable task's context and its current->flags
will have PF_RELIABLE and this will lead to redirect it's memory allocation
to mirrored region.

Update reliable task's gfp flag can only happen in normal task context by
checking in_task().

Signed-off-by: Ma Wupeng <mawupeng1@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/page_alloc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 38f2a84d3224..3ae924633e2c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4625,6 +4625,9 @@ static inline void prepare_before_alloc(gfp_t *gfp_mask)
 		return;
 	}
 
+	if (!in_task())
+		return;
+
 	if (is_global_init(current) || (current->flags & PF_RELIABLE))
 		*gfp_mask |= ___GFP_RELIABILITY;
 }
-- 
Gitee


From 0ddc1e480f6deb139b4b2236cbdf7cafbbc0548e Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Thu, 12 May 2022 14:32:44 +0000
Subject: [PATCH 320/340] ext4: fix use-after-free in ext4_search_dir

mainline inclusion
from mainline-v5.18-rc4
commit c186f0887fe7061a35cebef024550ec33ef8fbd8
category: bugfix
bugzilla: 186477, https://gitee.com/openeuler/kernel/issues/I55UHT
CVE: NA

-------------------------------------------------

We got issue as follows:
EXT4-fs (loop0): mounted filesystem without journal. Opts: ,errors=continue

==================================================================
BUG: KASAN: use-after-free in ext4_search_dir fs/ext4/namei.c:1394 [inline]
BUG: KASAN: use-after-free in search_dirblock fs/ext4/namei.c:1199 [inline]
BUG: KASAN: use-after-free in __ext4_find_entry+0xdca/0x1210 fs/ext4/namei.c:1553
Read of size 1 at addr ffff8881317c3005 by task syz-executor117/2331

CPU: 1 PID: 2331 Comm: syz-executor117 Not tainted 5.10.0+ #1
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.14.0-0-g155821a1990b-prebuilt.qemu.org 04/01/2014
Call Trace:
 __dump_stack lib/dump_stack.c:83 [inline]
 dump_stack+0x144/0x187 lib/dump_stack.c:124
 print_address_description+0x7d/0x630 mm/kasan/report.c:387
 __kasan_report+0x132/0x190 mm/kasan/report.c:547
 kasan_report+0x47/0x60 mm/kasan/report.c:564
 ext4_search_dir fs/ext4/namei.c:1394 [inline]
 search_dirblock fs/ext4/namei.c:1199 [inline]
 __ext4_find_entry+0xdca/0x1210 fs/ext4/namei.c:1553
 ext4_lookup_entry fs/ext4/namei.c:1622 [inline]
 ext4_lookup+0xb8/0x3a0 fs/ext4/namei.c:1690
 __lookup_hash+0xc5/0x190 fs/namei.c:1451
 do_rmdir+0x19e/0x310 fs/namei.c:3760
 do_syscall_64+0x33/0x40 arch/x86/entry/common.c:46
 entry_SYSCALL_64_after_hwframe+0x44/0xa9
RIP: 0033:0x445e59
Code: 4d c7 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 1b c7 fb ff c3 66 2e 0f 1f 84 00 00 00 00
RSP: 002b:00007fff2277fac8 EFLAGS: 00000246 ORIG_RAX: 0000000000000054
RAX: ffffffffffffffda RBX: 0000000000400280 RCX: 0000000000445e59
RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000200000c0
RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000002
R10: 00007fff2277f990 R11: 0000000000000246 R12: 0000000000000000
R13: 431bde82d7b634db R14: 0000000000000000 R15: 0000000000000000

The buggy address belongs to the page:
page:0000000048cd3304 refcount:0 mapcount:0 mapping:0000000000000000 index:0x1 pfn:0x1317c3
flags: 0x200000000000000()
raw: 0200000000000000 ffffea0004526588 ffffea0004528088 0000000000000000
raw: 0000000000000001 0000000000000000 00000000ffffffff 0000000000000000
page dumped because: kasan: bad access detected

Memory state around the buggy address:
 ffff8881317c2f00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 ffff8881317c2f80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
>ffff8881317c3000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
                   ^
 ffff8881317c3080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
 ffff8881317c3100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff
==================================================================

ext4_search_dir:
  ...
  de = (struct ext4_dir_entry_2 *)search_buf;
  dlimit = search_buf + buf_size;
  while ((char *) de < dlimit) {
  ...
    if ((char *) de + de->name_len <= dlimit &&
	 ext4_match(dir, fname, de)) {
	    ...
    }
  ...
    de_len = ext4_rec_len_from_disk(de->rec_len, dir->i_sb->s_blocksize);
    if (de_len <= 0)
      return -1;
    offset += de_len;
    de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
  }

Assume:
de=0xffff8881317c2fff
dlimit=0x0xffff8881317c3000

If read 'de->name_len' which address is 0xffff8881317c3005, obviously is
out of range, then will trigger use-after-free.
To solve this issue, 'dlimit' must reserve 8 bytes, as we will read
'de->name_len' to judge if '(char *) de + de->name_len' out of range.

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20220324064816.1209985-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org

conflicts:
	fs/ext4/namei.c

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: yebin <yebin10@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/ext4.h  | 4 ++++
 fs/ext4/namei.c | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6787d12d5abf..16e2b719d47c 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1985,6 +1985,10 @@ static inline int ext4_forced_shutdown(struct ext4_sb_info *sbi)
  * Structure of a directory entry
  */
 #define EXT4_NAME_LEN 255
+/*
+ * Base length of the ext4 directory entry excluding the name length
+ */
+#define EXT4_BASE_DIR_LEN (sizeof(struct ext4_dir_entry_2) - EXT4_NAME_LEN)
 
 struct ext4_dir_entry {
 	__le32	inode;			/* Inode number */
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 867deea2fbdb..e3e57eab371f 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1304,10 +1304,10 @@ int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
 
 	de = (struct ext4_dir_entry_2 *)search_buf;
 	dlimit = search_buf + buf_size;
-	while ((char *) de < dlimit) {
+	while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
 		/* this code is executed quadratically often */
 		/* do minimal checking `by hand' */
-		if ((char *) de + de->name_len <= dlimit &&
+		if (de->name + de->name_len <= dlimit &&
 		    ext4_match(fname, de)) {
 			/* found a match - just to be sure, do
 			 * a full check */
-- 
Gitee


From e2aee86820958e18ca7e7dabf920700cdc48607c Mon Sep 17 00:00:00 2001
From: Ritesh Harjani <riteshh@linux.ibm.com>
Date: Thu, 12 May 2022 14:32:45 +0000
Subject: [PATCH 321/340] ext4: unregister sysfs path before destroying jbd2
 journal

mainline inclusion
from mainline-v5.7-rc1
commit 5e47868fb94b63c7068dcfd2469d99fba06bae9d
category: bugfix
bugzilla: 186675, https://gitee.com/openeuler/kernel/issues/I55TUC
CVE: NA

-------------------------------------------------

Call ext4_unregister_sysfs(), before destroying jbd2 journal,
since below might cause, NULL pointer dereference issue.
This got reported with LTP tests.

ext4_put_super() 		cat /sys/fs/ext4/loop2/journal_task
	| 				ext4_attr_show();
ext4_jbd2_journal_destroy();  			|
    	|				 journal_task_show()
	| 					|
	| 				task_pid_vnr(NULL);
sbi->s_journal = NULL;

Signed-off-by: Ritesh Harjani <riteshh@linux.ibm.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20200318061301.4320-1-riteshh@linux.ibm.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

conflicts:
	fs/ext4/super.c

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Reviewed-by: yebin <yebin10@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/super.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f961f7d94a70..eb70be800507 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1160,6 +1160,13 @@ static void ext4_put_super(struct super_block *sb)
 	flush_work(&sbi->s_error_work);
 	destroy_workqueue(sbi->rsv_conversion_wq);
 
+	/*
+	 * Unregister sysfs before destroying jbd2 journal.
+	 * Since we could still access attr_journal_task attribute via sysfs
+	 * path which could have sbi->s_journal->j_task as NULL
+	 */
+	ext4_unregister_sysfs(sb);
+
 	if (sbi->s_journal) {
 		aborted = is_journal_aborted(sbi->s_journal);
 		err = jbd2_journal_destroy(sbi->s_journal);
@@ -1169,7 +1176,6 @@ static void ext4_put_super(struct super_block *sb)
 		}
 	}
 
-	ext4_unregister_sysfs(sb);
 	ext4_es_unregister_shrinker(sbi);
 	del_timer_sync(&sbi->s_err_report);
 	ext4_release_system_zone(sb);
-- 
Gitee


From 84597fe1ff2df016d4d99f36a3f0bd9e7c6e823f Mon Sep 17 00:00:00 2001
From: Ye Bin <yebin10@huawei.com>
Date: Thu, 12 May 2022 14:32:46 +0000
Subject: [PATCH 322/340] ext4: fix bug_on in start_this_handle during umount
 filesystem

mainline inclusion
from mainline-v5.18-rc4
commit b98535d091795a79336f520b0708457aacf55c67
category: bugfix
bugzilla: 186675, https://gitee.com/openeuler/kernel/issues/I55TUC
CVE: NA

-------------------------------------------------

We got issue as follows:
------------[ cut here ]------------
kernel BUG at fs/jbd2/transaction.c:389!
invalid opcode: 0000 [#1] PREEMPT SMP KASAN PTI
CPU: 9 PID: 131 Comm: kworker/9:1 Not tainted 5.17.0-862.14.0.6.x86_64-00001-g23f87daf7d74-dirty #197
Workqueue: events flush_stashed_error_work
RIP: 0010:start_this_handle+0x41c/0x1160
RSP: 0018:ffff888106b47c20 EFLAGS: 00010202
RAX: ffffed10251b8400 RBX: ffff888128dc204c RCX: ffffffffb52972ac
RDX: 0000000000000200 RSI: 0000000000000004 RDI: ffff888128dc2050
RBP: 0000000000000039 R08: 0000000000000001 R09: ffffed10251b840a
R10: ffff888128dc204f R11: ffffed10251b8409 R12: ffff888116d78000
R13: 0000000000000000 R14: dffffc0000000000 R15: ffff888128dc2000
FS:  0000000000000000(0000) GS:ffff88839d680000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000000001620068 CR3: 0000000376c0e000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 <TASK>
 jbd2__journal_start+0x38a/0x790
 jbd2_journal_start+0x19/0x20
 flush_stashed_error_work+0x110/0x2b3
 process_one_work+0x688/0x1080
 worker_thread+0x8b/0xc50
 kthread+0x26f/0x310
 ret_from_fork+0x22/0x30
 </TASK>
Modules linked in:
---[ end trace 0000000000000000 ]---

Above issue may happen as follows:
      umount            read procfs            error_work
ext4_put_super
  flush_work(&sbi->s_error_work);

                      ext4_mb_seq_groups_show
	                ext4_mb_load_buddy_gfp
			  ext4_mb_init_group
			    ext4_mb_init_cache
	                      ext4_read_block_bitmap_nowait
			        ext4_validate_block_bitmap
				  ext4_error
			            ext4_handle_error
			              schedule_work(&EXT4_SB(sb)->s_error_work);

  ext4_unregister_sysfs(sb);
  jbd2_journal_destroy(sbi->s_journal);
    journal_kill_thread
      journal->j_flags |= JBD2_UNMOUNT;

                                          flush_stashed_error_work
				            jbd2_journal_start
					      start_this_handle
					        BUG_ON(journal->j_flags & JBD2_UNMOUNT);

To solve this issue, we call 'ext4_unregister_sysfs() before flushing
s_error_work in ext4_put_super().

Signed-off-by: Ye Bin <yebin10@huawei.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Ritesh Harjani <riteshh@linux.ibm.com>
Link: https://lore.kernel.org/r/20220322012419.725457-1-yebin10@huawei.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>

conflicts:
	fs/ext4/super.c

Signed-off-by: ChenXiaoSong <chenxiaosong2@huawei.com>
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: yebin <yebin10@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/ext4/super.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index eb70be800507..4d9de6216fe4 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1154,19 +1154,24 @@ static void ext4_put_super(struct super_block *sb)
 	int aborted = 0;
 	int i, err;
 
-	ext4_unregister_li_request(sb);
-	ext4_quota_off_umount(sb);
-
-	flush_work(&sbi->s_error_work);
-	destroy_workqueue(sbi->rsv_conversion_wq);
-
 	/*
 	 * Unregister sysfs before destroying jbd2 journal.
 	 * Since we could still access attr_journal_task attribute via sysfs
 	 * path which could have sbi->s_journal->j_task as NULL
+	 * Unregister sysfs before flush sbi->s_error_work.
+	 * Since user may read /proc/fs/ext4/xx/mb_groups during umount, If
+	 * read metadata verify failed then will queue error work.
+	 * flush_stashed_error_work will call start_this_handle may trigger
+	 * BUG_ON.
 	 */
 	ext4_unregister_sysfs(sb);
 
+	ext4_unregister_li_request(sb);
+	ext4_quota_off_umount(sb);
+
+	flush_work(&sbi->s_error_work);
+	destroy_workqueue(sbi->rsv_conversion_wq);
+
 	if (sbi->s_journal) {
 		aborted = is_journal_aborted(sbi->s_journal);
 		err = jbd2_journal_destroy(sbi->s_journal);
-- 
Gitee


From 5ba8a3eb026f02aa75f4a22099250bfd4db659cb Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 13 May 2022 02:20:37 +0000
Subject: [PATCH 323/340] ax25: add refcount in ax25_dev to avoid UAF bugs

mainline inclusion
from mainline-v5.17-rc3
commit d01ffb9eee4af165d83b08dd73ebdf9fe94a519b
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5783H
CVE: CVE-2022-1204

--------------------------------

If we dereference ax25_dev after we call kfree(ax25_dev) in
ax25_dev_device_down(), it will lead to concurrency UAF bugs.
There are eight syscall functions suffer from UAF bugs, include
ax25_bind(), ax25_release(), ax25_connect(), ax25_ioctl(),
ax25_getname(), ax25_sendmsg(), ax25_getsockopt() and
ax25_info_show().

One of the concurrency UAF can be shown as below:

  (USE)                       |    (FREE)
                              |  ax25_device_event
                              |    ax25_dev_device_down
ax25_bind                     |    ...
  ...                         |      kfree(ax25_dev)
  ax25_fillin_cb()            |    ...
    ax25_fillin_cb_from_dev() |
  ...                         |

The root cause of UAF bugs is that kfree(ax25_dev) in
ax25_dev_device_down() is not protected by any locks.
When ax25_dev, which there are still pointers point to,
is released, the concurrency UAF bug will happen.

This patch introduces refcount into ax25_dev in order to
guarantee that there are no pointers point to it when ax25_dev
is released.

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>

conflict:
	net/ax25/af_ax25.c
	net/ax25/ax25_dev.c

Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/ax25.h    | 10 ++++++++++
 net/ax25/af_ax25.c    |  2 ++
 net/ax25/ax25_dev.c   | 12 ++++++++++--
 net/ax25/ax25_route.c |  3 +++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/include/net/ax25.h b/include/net/ax25.h
index 8b7eb46ad72d..d81bfb674906 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -236,6 +236,7 @@ typedef struct ax25_dev {
 #if defined(CONFIG_AX25_DAMA_SLAVE) || defined(CONFIG_AX25_DAMA_MASTER)
 	ax25_dama_info		dama;
 #endif
+	refcount_t		refcount;
 } ax25_dev;
 
 typedef struct ax25_cb {
@@ -290,6 +291,15 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
 	}
 }
 
+#define ax25_dev_hold(__ax25_dev) \
+	refcount_inc(&((__ax25_dev)->refcount))
+
+static __inline__ void ax25_dev_put(ax25_dev *ax25_dev)
+{
+	if (refcount_dec_and_test(&ax25_dev->refcount)) {
+		kfree(ax25_dev);
+	}
+}
 static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
 	skb->dev      = dev;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 2b0bee2dbbb7..46c6065e377b 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -102,6 +102,7 @@ static void ax25_kill_by_device(struct net_device *dev)
 			lock_sock(sk);
 			ax25_disconnect(s, ENETUNREACH);
 			s->ax25_dev = NULL;
+			ax25_dev_put(ax25_dev);
 			release_sock(sk);
 			spin_lock_bh(&ax25_list_lock);
 			sock_put(sk);
@@ -449,6 +450,7 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
 	  }
 
 out_put:
+	ax25_dev_put(ax25_dev);
 	ax25_cb_put(ax25);
 	return ret;
 
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index d92195cd7834..76d105390706 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -40,6 +40,7 @@ ax25_dev *ax25_addr_ax25dev(ax25_address *addr)
 	for (ax25_dev = ax25_dev_list; ax25_dev != NULL; ax25_dev = ax25_dev->next)
 		if (ax25cmp(addr, (ax25_address *)ax25_dev->dev->dev_addr) == 0) {
 			res = ax25_dev;
+			ax25_dev_hold(ax25_dev);
 		}
 	spin_unlock_bh(&ax25_dev_lock);
 
@@ -59,6 +60,7 @@ void ax25_dev_device_up(struct net_device *dev)
 		return;
 	}
 
+	refcount_set(&ax25_dev->refcount, 1);
 	dev->ax25_ptr     = ax25_dev;
 	ax25_dev->dev     = dev;
 	dev_hold(dev);
@@ -86,6 +88,7 @@ void ax25_dev_device_up(struct net_device *dev)
 	spin_lock_bh(&ax25_dev_lock);
 	ax25_dev->next = ax25_dev_list;
 	ax25_dev_list  = ax25_dev;
+	ax25_dev_hold(ax25_dev);
 	spin_unlock_bh(&ax25_dev_lock);
 
 	ax25_register_dev_sysctl(ax25_dev);
@@ -115,20 +118,22 @@ void ax25_dev_device_down(struct net_device *dev)
 
 	if ((s = ax25_dev_list) == ax25_dev) {
 		ax25_dev_list = s->next;
+		ax25_dev_put(ax25_dev);
 		spin_unlock_bh(&ax25_dev_lock);
 		dev->ax25_ptr = NULL;
 		dev_put(dev);
-		kfree(ax25_dev);
+		ax25_dev_put(ax25_dev);
 		return;
 	}
 
 	while (s != NULL && s->next != NULL) {
 		if (s->next == ax25_dev) {
 			s->next = ax25_dev->next;
+			ax25_dev_put(ax25_dev);
 			spin_unlock_bh(&ax25_dev_lock);
 			dev->ax25_ptr = NULL;
 			dev_put(dev);
-			kfree(ax25_dev);
+			ax25_dev_put(ax25_dev);
 			return;
 		}
 
@@ -136,6 +141,7 @@ void ax25_dev_device_down(struct net_device *dev)
 	}
 	spin_unlock_bh(&ax25_dev_lock);
 	dev->ax25_ptr = NULL;
+	ax25_dev_put(ax25_dev);
 }
 
 int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
@@ -152,6 +158,7 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
 		if (ax25_dev->forward != NULL)
 			return -EINVAL;
 		ax25_dev->forward = fwd_dev->dev;
+		ax25_dev_put(fwd_dev);
 		break;
 
 	case SIOCAX25DELFWD:
@@ -164,6 +171,7 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
 		return -EINVAL;
 	}
 
+	ax25_dev_put(ax25_dev);
 	return 0;
 }
 
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index 66d54fc11831..cd380767245c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -119,6 +119,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	ax25_rt->dev          = ax25_dev->dev;
 	ax25_rt->digipeat     = NULL;
 	ax25_rt->ip_mode      = ' ';
+	ax25_dev_put(ax25_dev);
 	if (route->digi_count != 0) {
 		if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
 			write_unlock_bh(&ax25_route_lock);
@@ -175,6 +176,7 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
 			}
 		}
 	}
+	ax25_dev_put(ax25_dev);
 	write_unlock_bh(&ax25_route_lock);
 
 	return 0;
@@ -217,6 +219,7 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
 	}
 
 out:
+	ax25_dev_put(ax25_dev);
 	write_unlock_bh(&ax25_route_lock);
 	return err;
 }
-- 
Gitee


From 8726f0cd968784ff0c3d3ccd61a1e81faf14c09c Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 13 May 2022 02:20:38 +0000
Subject: [PATCH 324/340] ax25: fix reference count leaks of ax25_dev

mainline inclusion
from mainline-v5.17-rc3
commit 87563a043cef044fed5db7967a75741cc16ad2b1
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5783H
CVE: CVE-2022-1204

--------------------------------

The previous commit d01ffb9eee4a ("ax25: add refcount in ax25_dev
to avoid UAF bugs") introduces refcount into ax25_dev, but there
are reference leak paths in ax25_ctl_ioctl(), ax25_fwd_ioctl(),
ax25_rt_add(), ax25_rt_del() and ax25_rt_opt().

This patch uses ax25_dev_put() and adjusts the position of
ax25_addr_ax25dev() to fix reference cout leaks of ax25_dev.

Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Reviewed-by: Dan Carpenter <dan.carpenter@oracle.com>
Link: https://lore.kernel.org/r/20220203150811.42256-1-duoming@zju.edu.cn
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

conflict:
	net/ax25/ax25_dev.c

Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/net/ax25.h    |  8 +++++---
 net/ax25/af_ax25.c    | 12 ++++++++----
 net/ax25/ax25_dev.c   | 24 +++++++++++++++++-------
 net/ax25/ax25_route.c | 16 +++++++++++-----
 4 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/include/net/ax25.h b/include/net/ax25.h
index d81bfb674906..aadff553e4b7 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -291,10 +291,12 @@ static __inline__ void ax25_cb_put(ax25_cb *ax25)
 	}
 }
 
-#define ax25_dev_hold(__ax25_dev) \
-	refcount_inc(&((__ax25_dev)->refcount))
+static inline void ax25_dev_hold(ax25_dev *ax25_dev)
+{
+	refcount_inc(&ax25_dev->refcount);
+}
 
-static __inline__ void ax25_dev_put(ax25_dev *ax25_dev)
+static inline void ax25_dev_put(ax25_dev *ax25_dev)
 {
 	if (refcount_dec_and_test(&ax25_dev->refcount)) {
 		kfree(ax25_dev);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 46c6065e377b..1fc954d34021 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -369,21 +369,25 @@ static int ax25_ctl_ioctl(const unsigned int cmd, void __user *arg)
 	if (copy_from_user(&ax25_ctl, arg, sizeof(ax25_ctl)))
 		return -EFAULT;
 
-	if ((ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr)) == NULL)
-		return -ENODEV;
-
 	if (ax25_ctl.digi_count > AX25_MAX_DIGIS)
 		return -EINVAL;
 
 	if (ax25_ctl.arg > ULONG_MAX / HZ && ax25_ctl.cmd != AX25_KILL)
 		return -EINVAL;
 
+	ax25_dev = ax25_addr_ax25dev(&ax25_ctl.port_addr);
+	if (!ax25_dev)
+		return -ENODEV;
+
 	digi.ndigi = ax25_ctl.digi_count;
 	for (k = 0; k < digi.ndigi; k++)
 		digi.calls[k] = ax25_ctl.digi_addr[k];
 
-	if ((ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev)) == NULL)
+	ax25 = ax25_find_cb(&ax25_ctl.source_addr, &ax25_ctl.dest_addr, &digi, ax25_dev->dev);
+	if (!ax25) {
+		ax25_dev_put(ax25_dev);
 		return -ENOTCONN;
+	}
 
 	switch (ax25_ctl.cmd) {
 	case AX25_KILL:
diff --git a/net/ax25/ax25_dev.c b/net/ax25/ax25_dev.c
index 76d105390706..55a611f7239b 100644
--- a/net/ax25/ax25_dev.c
+++ b/net/ax25/ax25_dev.c
@@ -88,8 +88,8 @@ void ax25_dev_device_up(struct net_device *dev)
 	spin_lock_bh(&ax25_dev_lock);
 	ax25_dev->next = ax25_dev_list;
 	ax25_dev_list  = ax25_dev;
-	ax25_dev_hold(ax25_dev);
 	spin_unlock_bh(&ax25_dev_lock);
+	ax25_dev_hold(ax25_dev);
 
 	ax25_register_dev_sysctl(ax25_dev);
 }
@@ -118,8 +118,8 @@ void ax25_dev_device_down(struct net_device *dev)
 
 	if ((s = ax25_dev_list) == ax25_dev) {
 		ax25_dev_list = s->next;
-		ax25_dev_put(ax25_dev);
 		spin_unlock_bh(&ax25_dev_lock);
+		ax25_dev_put(ax25_dev);
 		dev->ax25_ptr = NULL;
 		dev_put(dev);
 		ax25_dev_put(ax25_dev);
@@ -129,8 +129,8 @@ void ax25_dev_device_down(struct net_device *dev)
 	while (s != NULL && s->next != NULL) {
 		if (s->next == ax25_dev) {
 			s->next = ax25_dev->next;
-			ax25_dev_put(ax25_dev);
 			spin_unlock_bh(&ax25_dev_lock);
+			ax25_dev_put(ax25_dev);
 			dev->ax25_ptr = NULL;
 			dev_put(dev);
 			ax25_dev_put(ax25_dev);
@@ -153,25 +153,35 @@ int ax25_fwd_ioctl(unsigned int cmd, struct ax25_fwd_struct *fwd)
 
 	switch (cmd) {
 	case SIOCAX25ADDFWD:
-		if ((fwd_dev = ax25_addr_ax25dev(&fwd->port_to)) == NULL)
+		fwd_dev = ax25_addr_ax25dev(&fwd->port_to);
+		if (!fwd_dev) {
+			ax25_dev_put(ax25_dev);
 			return -EINVAL;
-		if (ax25_dev->forward != NULL)
+		}
+		if (ax25_dev->forward) {
+			ax25_dev_put(fwd_dev);
+			ax25_dev_put(ax25_dev);
 			return -EINVAL;
+		}
 		ax25_dev->forward = fwd_dev->dev;
 		ax25_dev_put(fwd_dev);
+		ax25_dev_put(ax25_dev);
 		break;
 
 	case SIOCAX25DELFWD:
-		if (ax25_dev->forward == NULL)
+		if (!ax25_dev->forward) {
+			ax25_dev_put(ax25_dev);
 			return -EINVAL;
+		}
 		ax25_dev->forward = NULL;
+		ax25_dev_put(ax25_dev);
 		break;
 
 	default:
+		ax25_dev_put(ax25_dev);
 		return -EINVAL;
 	}
 
-	ax25_dev_put(ax25_dev);
 	return 0;
 }
 
diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index cd380767245c..8f81de88f006 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -78,11 +78,13 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	ax25_dev *ax25_dev;
 	int i;
 
-	if ((ax25_dev = ax25_addr_ax25dev(&route->port_addr)) == NULL)
-		return -EINVAL;
 	if (route->digi_count > AX25_MAX_DIGIS)
 		return -EINVAL;
 
+	ax25_dev = ax25_addr_ax25dev(&route->port_addr);
+	if (!ax25_dev)
+		return -EINVAL;
+
 	write_lock_bh(&ax25_route_lock);
 
 	ax25_rt = ax25_route_list;
@@ -94,6 +96,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 			if (route->digi_count != 0) {
 				if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
 					write_unlock_bh(&ax25_route_lock);
+					ax25_dev_put(ax25_dev);
 					return -ENOMEM;
 				}
 				ax25_rt->digipeat->lastrepeat = -1;
@@ -104,6 +107,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 				}
 			}
 			write_unlock_bh(&ax25_route_lock);
+			ax25_dev_put(ax25_dev);
 			return 0;
 		}
 		ax25_rt = ax25_rt->next;
@@ -111,6 +115,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 
 	if ((ax25_rt = kmalloc(sizeof(ax25_route), GFP_ATOMIC)) == NULL) {
 		write_unlock_bh(&ax25_route_lock);
+		ax25_dev_put(ax25_dev);
 		return -ENOMEM;
 	}
 
@@ -119,11 +124,11 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	ax25_rt->dev          = ax25_dev->dev;
 	ax25_rt->digipeat     = NULL;
 	ax25_rt->ip_mode      = ' ';
-	ax25_dev_put(ax25_dev);
 	if (route->digi_count != 0) {
 		if ((ax25_rt->digipeat = kmalloc(sizeof(ax25_digi), GFP_ATOMIC)) == NULL) {
 			write_unlock_bh(&ax25_route_lock);
 			kfree(ax25_rt);
+			ax25_dev_put(ax25_dev);
 			return -ENOMEM;
 		}
 		ax25_rt->digipeat->lastrepeat = -1;
@@ -136,6 +141,7 @@ static int __must_check ax25_rt_add(struct ax25_routes_struct *route)
 	ax25_rt->next   = ax25_route_list;
 	ax25_route_list = ax25_rt;
 	write_unlock_bh(&ax25_route_lock);
+	ax25_dev_put(ax25_dev);
 
 	return 0;
 }
@@ -176,8 +182,8 @@ static int ax25_rt_del(struct ax25_routes_struct *route)
 			}
 		}
 	}
-	ax25_dev_put(ax25_dev);
 	write_unlock_bh(&ax25_route_lock);
+	ax25_dev_put(ax25_dev);
 
 	return 0;
 }
@@ -219,8 +225,8 @@ static int ax25_rt_opt(struct ax25_route_opt_struct *rt_option)
 	}
 
 out:
-	ax25_dev_put(ax25_dev);
 	write_unlock_bh(&ax25_route_lock);
+	ax25_dev_put(ax25_dev);
 	return err;
 }
 
-- 
Gitee


From 42688afbfc39f0d63b9c650d41f1ee7779e932f1 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 13 May 2022 02:20:39 +0000
Subject: [PATCH 325/340] ax25: fix UAF bugs of net_device caused by rebinding
 operation

mainline inclusion
from mainline-v5.17-rc4
commit feef318c855a361a1eccd880f33e88c460eb63b4
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5783H
CVE: CVE-2022-1204

--------------------------------

The ax25_kill_by_device() will set s->ax25_dev = NULL and
call ax25_disconnect() to change states of ax25_cb and
sock, if we call ax25_bind() before ax25_kill_by_device().

However, if we call ax25_bind() again between the window of
ax25_kill_by_device() and ax25_dev_device_down(), the values
and states changed by ax25_kill_by_device() will be reassigned.

Finally, ax25_dev_device_down() will deallocate net_device.
If we dereference net_device in syscall functions such as
ax25_release(), ax25_sendmsg(), ax25_getsockopt(), ax25_getname()
and ax25_info_show(), a UAF bug will occur.

One of the possible race conditions is shown below:

      (USE)                   |      (FREE)
ax25_bind()                   |
                              |  ax25_kill_by_device()
ax25_bind()                   |
ax25_connect()                |    ...
                              |  ax25_dev_device_down()
                              |    ...
                              |    dev_put_track(dev, ...) //FREE
ax25_release()                |    ...
  ax25_send_control()         |
    alloc_skb()      //USE    |

the corresponding fail log is shown below:

BUG: KASAN: use-after-free in ax25_send_control+0x43/0x210
...
Call Trace:
  ...
  ax25_send_control+0x43/0x210
  ax25_release+0x2db/0x3b0
  __sock_release+0x6d/0x120
  sock_close+0xf/0x20
  __fput+0x11f/0x420
  ...
Allocated by task 1283:
  ...
  __kasan_kmalloc+0x81/0xa0
  alloc_netdev_mqs+0x5a/0x680
  mkiss_open+0x6c/0x380
  tty_ldisc_open+0x55/0x90
  ...
Freed by task 1969:
  ...
  kfree+0xa3/0x2c0
  device_release+0x54/0xe0
  kobject_put+0xa5/0x120
  tty_ldisc_kill+0x3e/0x80
  ...

In order to fix these UAF bugs caused by rebinding operation,
this patch adds dev_hold_track() into ax25_bind() and
corresponding dev_put_track() into ax25_kill_by_device().

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>

conflict:
	net/ax25/af_ax25.c

The commit in mainline use dev_{hold/put}track, which 4.19 has not
introduced, so use dev{hold/put} instead.

Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1fc954d34021..3769ffbe3302 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -102,6 +102,7 @@ static void ax25_kill_by_device(struct net_device *dev)
 			lock_sock(sk);
 			ax25_disconnect(s, ENETUNREACH);
 			s->ax25_dev = NULL;
+			dev_put(ax25_dev->dev);
 			ax25_dev_put(ax25_dev);
 			release_sock(sk);
 			spin_lock_bh(&ax25_list_lock);
@@ -1123,8 +1124,10 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		}
 	}
 
-	if (ax25_dev != NULL)
+	if (ax25_dev) {
 		ax25_fillin_cb(ax25, ax25_dev);
+		dev_hold(ax25_dev->dev);
+	}
 
 done:
 	ax25_cb_add(ax25);
-- 
Gitee


From 3b5be7185ac2f86e7f2fb6c3b01217a56b0edd81 Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 13 May 2022 02:20:40 +0000
Subject: [PATCH 326/340] ax25: Fix refcount leaks caused by ax25_cb_del()

mainline inclusion
from mainline-v5.18-rc1
commit 9fd75b66b8f68498454d685dc4ba13192ae069b0
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5783H
CVE: CVE-2022-1204

--------------------------------

The previous commit d01ffb9eee4a ("ax25: add refcount in ax25_dev to
avoid UAF bugs") and commit feef318c855a ("ax25: fix UAF bugs of
net_device caused by rebinding operation") increase the refcounts of
ax25_dev and net_device in ax25_bind() and decrease the matching refcounts
in ax25_kill_by_device() in order to prevent UAF bugs, but there are
reference count leaks.

The root cause of refcount leaks is shown below:

     (Thread 1)                      |      (Thread 2)
ax25_bind()                          |
 ...                                 |
 ax25_addr_ax25dev()                 |
  ax25_dev_hold()   //(1)            |
  ...                                |
 dev_hold_track()   //(2)            |
 ...                                 | ax25_destroy_socket()
                                     |  ax25_cb_del()
                                     |   ...
                                     |   hlist_del_init() //(3)
                                     |
                                     |
     (Thread 3)                      |
ax25_kill_by_device()                |
 ...                                 |
 ax25_for_each(s, &ax25_list) {      |
  if (s->ax25_dev == ax25_dev) //(4) |
   ...                               |

Firstly, we use ax25_bind() to increase the refcount of ax25_dev in
position (1) and increase the refcount of net_device in position (2).
Then, we use ax25_cb_del() invoked by ax25_destroy_socket() to delete
ax25_cb in hlist in position (3) before calling ax25_kill_by_device().
Finally, the decrements of refcounts in ax25_kill_by_device() will not
be executed, because no s->ax25_dev equals to ax25_dev in position (4).

This patch adds decrements of refcounts in ax25_release() and use
lock_sock() to do synchronization. If refcounts decrease in ax25_release(),
the decrements of refcounts in ax25_kill_by_device() will not be
executed and vice versa.

Fixes: d01ffb9eee4a ("ax25: add refcount in ax25_dev to avoid UAF bugs")
Fixes: 87563a043cef ("ax25: fix reference count leaks of ax25_dev")
Fixes: feef318c855a ("ax25: fix UAF bugs of net_device caused by rebinding operation")
Reported-by: Thomas Osterried <thomas@osterried.de>
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>

conflict:
	net/ax25/af_ax25.c

Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 3769ffbe3302..3f79e21cb2de 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -102,8 +102,10 @@ static void ax25_kill_by_device(struct net_device *dev)
 			lock_sock(sk);
 			ax25_disconnect(s, ENETUNREACH);
 			s->ax25_dev = NULL;
-			dev_put(ax25_dev->dev);
-			ax25_dev_put(ax25_dev);
+			if (sk->sk_socket) {
+				dev_put(ax25_dev->dev);
+				ax25_dev_put(ax25_dev);
+			}
 			release_sock(sk);
 			spin_lock_bh(&ax25_list_lock);
 			sock_put(sk);
@@ -979,14 +981,20 @@ static int ax25_release(struct socket *sock)
 {
 	struct sock *sk = sock->sk;
 	ax25_cb *ax25;
+	ax25_dev *ax25_dev;
 
 	if (sk == NULL)
 		return 0;
 
 	sock_hold(sk);
-	sock_orphan(sk);
 	lock_sock(sk);
+	sock_orphan(sk);
 	ax25 = sk_to_ax25(sk);
+	ax25_dev = ax25->ax25_dev;
+	if (ax25_dev) {
+		dev_put(ax25_dev->dev);
+		ax25_dev_put(ax25_dev);
+	}
 
 	if (sk->sk_type == SOCK_SEQPACKET) {
 		switch (ax25->state) {
-- 
Gitee


From a1cbc83674b1528169c803031bb47dc12adc1a7a Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Fri, 13 May 2022 02:20:41 +0000
Subject: [PATCH 327/340] ax25: fix UAF bug in ax25_send_control()

mainline inclusion
from mainline-v5.18-rc1
commit 5352a761308397a0e6250fdc629bb3f615b94747
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I5783H
CVE: CVE-2022-1204

--------------------------------

There are UAF bugs in ax25_send_control(), when we call ax25_release()
to deallocate ax25_dev. The possible race condition is shown below:

      (Thread 1)              |     (Thread 2)
ax25_dev_device_up() //(1)    |
                              | ax25_kill_by_device()
ax25_bind()          //(2)    |
ax25_connect()                | ...
 ax25->state = AX25_STATE_1   |
 ...                          | ax25_dev_device_down() //(3)

      (Thread 3)
ax25_release()                |
 ax25_dev_put()  //(4) FREE   |
 case AX25_STATE_1:           |
  ax25_send_control()         |
   alloc_skb()       //USE    |

The refcount of ax25_dev increases in position (1) and (2), and
decreases in position (3) and (4). The ax25_dev will be freed
before dereference sites in ax25_send_control().

The following is part of the report:

[  102.297448] BUG: KASAN: use-after-free in ax25_send_control+0x33/0x210
[  102.297448] Read of size 8 at addr ffff888009e6e408 by task ax25_close/602
[  102.297448] Call Trace:
[  102.303751]  ax25_send_control+0x33/0x210
[  102.303751]  ax25_release+0x356/0x450
[  102.305431]  __sock_release+0x6d/0x120
[  102.305431]  sock_close+0xf/0x20
[  102.305431]  __fput+0x11f/0x420
[  102.305431]  task_work_run+0x86/0xd0
[  102.307130]  get_signal+0x1075/0x1220
[  102.308253]  arch_do_signal_or_restart+0x1df/0xc00
[  102.308253]  exit_to_user_mode_prepare+0x150/0x1e0
[  102.308253]  syscall_exit_to_user_mode+0x19/0x50
[  102.308253]  do_syscall_64+0x48/0x90
[  102.308253]  entry_SYSCALL_64_after_hwframe+0x44/0xae
[  102.308253] RIP: 0033:0x405ae7

This patch defers the free operation of ax25_dev and net_device after
all corresponding dereference sites in ax25_release() to avoid UAF.

Fixes: 9fd75b66b8f6 ("ax25: Fix refcount leaks caused by ax25_cb_del()")
Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 3f79e21cb2de..1fab69daa968 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -991,10 +991,6 @@ static int ax25_release(struct socket *sock)
 	sock_orphan(sk);
 	ax25 = sk_to_ax25(sk);
 	ax25_dev = ax25->ax25_dev;
-	if (ax25_dev) {
-		dev_put(ax25_dev->dev);
-		ax25_dev_put(ax25_dev);
-	}
 
 	if (sk->sk_type == SOCK_SEQPACKET) {
 		switch (ax25->state) {
@@ -1056,6 +1052,10 @@ static int ax25_release(struct socket *sock)
 		sk->sk_state_change(sk);
 		ax25_destroy_socket(ax25);
 	}
+	if (ax25_dev) {
+		dev_put(ax25_dev->dev);
+		ax25_dev_put(ax25_dev);
+	}
 
 	sock->sk   = NULL;
 	release_sock(sk);
-- 
Gitee


From a5aba8243f662fb092a94b569794f619da398bbb Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Fri, 13 May 2022 02:58:25 +0000
Subject: [PATCH 328/340] io_uring: fix race between timeout flush and removal

stable inclusion
from stable-v5.10.110
commit 2827328e646d0c2d3db1bfcad4b5f5016ce0d643
category: bugfix
bugzilla: 186670,https://gitee.com/src-openeuler/kernel/issues/I54H78
CVE: CVE-2022-29582

--------------------------------

commit e677edbcabee849bfdd43f1602bccbecf736a646 upstream.

io_flush_timeouts() assumes the timeout isn't in progress of triggering
or being removed/canceled, so it unconditionally removes it from the
timeout list and attempts to cancel it.

Leave it on the list and let the normal timeout cancelation take care
of it.

Cc: stable@vger.kernel.org # 5.5+
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Guo Xuenan <guoxuenan@huawei.com>
Conflicts:
	fs/io_uring.c
Reviewed-by: Zhang Yi <yi.zhang@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 fs/io_uring.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index a21d1d67d725..c104425b2557 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -1332,6 +1332,7 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
 
 static void io_flush_timeouts(struct io_ring_ctx *ctx)
 {
+	struct io_kiocb *req, *tmp;
 	u32 seq;
 
 	if (list_empty(&ctx->timeout_list))
@@ -1339,10 +1340,8 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 
 	seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
 
-	do {
+	list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
 		u32 events_needed, events_got;
-		struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
-						struct io_kiocb, timeout.list);
 
 		if (io_is_timeout_noseq(req))
 			break;
@@ -1359,9 +1358,8 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
 		if (events_got < events_needed)
 			break;
 
-		list_del_init(&req->timeout.list);
 		io_kill_timeout(req);
-	} while (!list_empty(&ctx->timeout_list));
+	}
 
 	ctx->cq_last_tm_flush = seq;
 }
@@ -5213,6 +5211,7 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 	else
 		data->mode = HRTIMER_MODE_REL;
 
+	INIT_LIST_HEAD(&req->timeout.list);
 	hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
 	return 0;
 }
@@ -6032,6 +6031,7 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
 			prev = NULL;
 	}
 
+	list_del(&req->timeout.list);
 	spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
 	if (prev) {
-- 
Gitee


From 009c22bb86b3d6214e48d0e05a1648ebf62d8416 Mon Sep 17 00:00:00 2001
From: Yu Liao <liaoyu15@huawei.com>
Date: Fri, 13 May 2022 08:34:34 +0000
Subject: [PATCH 329/340] mm: clear_freelist_page: Provide timeout mechanism
 for worker runtime

hulk inclusion
category: feature
bugzilla: https://gitee.com/openeuler/kernel/issues/I558LD
CVE: NA

--------------------------------

In the case of large memory, the clear freelist will hold zone lock
for a long time. As a result, the process may be blocked unless clear
freelist thread exit, and causing the system to be reset by the watchdog.

Provide a mechanism to stop clear freelist threads when elapsed time
exceeds cfp_timeout, which can be set by module_param().

Signed-off-by: Yu Liao <liaoyu15@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/clear_freelist_page.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/mm/clear_freelist_page.c b/mm/clear_freelist_page.c
index f0a13b8fdcdb..1ea59a3e527f 100644
--- a/mm/clear_freelist_page.c
+++ b/mm/clear_freelist_page.c
@@ -13,8 +13,10 @@
 #include <linux/sched.h>
 #include <linux/atomic.h>
 #include <linux/nmi.h>
+#include <linux/sched/clock.h>
 #include <linux/module.h>
 
+#define CFP_DEFAULT_TIMEOUT 2000
 #define for_each_populated_zone_pgdat(pgdat, zone) \
 	for (zone = pgdat->node_zones;      \
 		zone;                  \
@@ -32,6 +34,7 @@ static DECLARE_WAIT_QUEUE_HEAD(clear_freelist_wait);
 static DEFINE_MUTEX(clear_freelist_lock);
 static atomic_t clear_freelist_workers;
 static atomic_t clear_pages_num;
+static ulong cfp_timeout_ms = CFP_DEFAULT_TIMEOUT;
 static int one = 1;
 
 /*
@@ -51,15 +54,25 @@ static struct zone *next_pgdat_zone(struct zone *zone)
 static void clear_pgdat_freelist_pages(struct work_struct *work)
 {
 	struct pgdat_entry *entry = container_of(work, struct pgdat_entry, work);
+	u64 cfp_timeout_ns = cfp_timeout_ms * NSEC_PER_MSEC;
 	struct pglist_data *pgdat = entry->pgdat;
 	unsigned long flags, order, t;
 	struct page *page;
 	struct zone *zone;
+	u64 start, now;
+
+	start = sched_clock();
 
 	for_each_populated_zone_pgdat(pgdat, zone) {
 		spin_lock_irqsave(&zone->lock, flags);
 		for_each_migratetype_order(order, t) {
 			list_for_each_entry(page, &zone->free_area[order].free_list[t], lru) {
+				now = sched_clock();
+				if (unlikely(now - start > cfp_timeout_ns)) {
+					spin_unlock_irqrestore(&zone->lock, flags);
+					goto out;
+				}
+
 #ifdef CONFIG_KMAP_LOCAL
 				int i;
 
@@ -77,6 +90,8 @@ static void clear_pgdat_freelist_pages(struct work_struct *work)
 
 		cond_resched();
 	}
+
+out:
 	kfree(entry);
 
 	if (atomic_dec_and_test(&clear_freelist_workers))
@@ -170,3 +185,4 @@ static int __init clear_freelist_init(void)
 	return 0;
 }
 module_init(clear_freelist_init);
+module_param(cfp_timeout_ms, ulong, 0644);
-- 
Gitee


From da51b55eeff9ee6de1ad9724ed0f0f17ebc00aa9 Mon Sep 17 00:00:00 2001
From: Zhou Guanghui <zhouguanghui1@huawei.com>
Date: Fri, 13 May 2022 08:34:35 +0000
Subject: [PATCH 330/340] mm/share_pool: Support read-only memory allocation

ascend inclusion
category: Bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I578LV
CVE: NA

--------------------------------

When the driver uses the shared pool memory to share the memory
with the user space, the user space is not allowed to operate
this area. This prevents users from damaging sensitive data.

When the sp_alloc and k2u processes apply for private memory,
read-only memory can be applied for.

Signed-off-by: Zhou Guanghui <zhouguanghui1@huawei.com>
Reviewed-by: Weilong Chen <chenweilong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/share_pool.h |  3 ++-
 mm/share_pool.c            | 18 ++++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/include/linux/share_pool.h b/include/linux/share_pool.h
index 669d32f9a092..ba001069fbd2 100644
--- a/include/linux/share_pool.h
+++ b/include/linux/share_pool.h
@@ -13,6 +13,7 @@
 #define SP_HUGEPAGE_ONLY	(1 << 1)
 #define SP_DVPP			(1 << 2)
 #define SP_SPEC_NODE_ID		(1 << 3)
+#define SP_PROT_RO		(1 << 16)
 
 #define DEVICE_ID_BITS		4UL
 #define DEVICE_ID_MASK		((1UL << DEVICE_ID_BITS) - 1UL)
@@ -22,7 +23,7 @@
 #define NODE_ID_SHIFT		(DEVICE_ID_SHIFT + DEVICE_ID_BITS)
 
 #define SP_FLAG_MASK		(SP_HUGEPAGE | SP_HUGEPAGE_ONLY | SP_DVPP | \
-				 SP_SPEC_NODE_ID | \
+				 SP_SPEC_NODE_ID | SP_PROT_RO | \
 				(DEVICE_ID_MASK << DEVICE_ID_SHIFT) | \
 				(NODE_ID_MASK << NODE_ID_SHIFT))
 
diff --git a/mm/share_pool.c b/mm/share_pool.c
index 833ecb7dd859..5ba353ddfabd 100644
--- a/mm/share_pool.c
+++ b/mm/share_pool.c
@@ -2349,6 +2349,9 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 	if (spg_node)
 		prot = spg_node->prot;
 
+	if (ac->sp_flags & SP_PROT_RO)
+		prot = PROT_READ;
+
 	/* when success, mmap_addr == spa->va_start */
 	mmap_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
 	if (IS_ERR_VALUE(mmap_addr)) {
@@ -2373,6 +2376,10 @@ static int sp_alloc_mmap(struct mm_struct *mm, struct sp_area *spa,
 		ret = -EINVAL;
 		goto unmap;
 	}
+
+	if (ac->sp_flags & SP_PROT_RO)
+		vma->vm_flags &= ~VM_MAYWRITE;
+
 	/* clean PTE_RDONLY flags or trigger SMMU event */
 	if (prot & PROT_WRITE)
 		vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
@@ -2666,6 +2673,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 		goto put_mm;
 	}
 
+	if (kc && kc->sp_flags & SP_PROT_RO)
+		prot = PROT_READ;
+
 	ret_addr = sp_mmap(mm, spa_file(spa), spa, &populate, prot);
 	if (IS_ERR_VALUE(ret_addr)) {
 		pr_debug("k2u mmap failed %lx\n", ret_addr);
@@ -2678,6 +2688,9 @@ static unsigned long sp_remap_kva_to_vma(unsigned long kva, struct sp_area *spa,
 	if (prot & PROT_WRITE)
 		vma->vm_page_prot = __pgprot(((~PTE_RDONLY) & vma->vm_page_prot.pgprot) | PTE_DIRTY);
 
+	if (kc && kc->sp_flags & SP_PROT_RO)
+		vma->vm_flags &= ~VM_MAYWRITE;
+
 	if (is_vm_hugetlb_page(vma)) {
 		ret = remap_vmalloc_hugepage_range(vma, (void *)kva, 0);
 		if (ret) {
@@ -2729,6 +2742,7 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un
 	struct sp_area *spa;
 	struct spg_proc_stat *stat;
 	unsigned long prot = PROT_READ | PROT_WRITE;
+	struct sp_k2u_context kc;
 
 	down_write(&sp_group_sem);
 	stat = sp_init_process_stat(current, current->mm, spg_none);
@@ -2747,8 +2761,8 @@ static void *sp_make_share_kva_to_task(unsigned long kva, unsigned long size, un
 	}
 
 	spa->kva = kva;
-
-	uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, NULL);
+	kc.sp_flags = sp_flags;
+	uva = (void *)sp_remap_kva_to_vma(kva, spa, current->mm, prot, &kc);
 	__sp_area_drop(spa);
 	if (IS_ERR(uva))
 		pr_err("remap k2u to task failed %ld\n", PTR_ERR(uva));
-- 
Gitee


From fec3e16daebaabd2818c54f3405ce8923b2e0f3b Mon Sep 17 00:00:00 2001
From: Luo Jiaxing <luojiaxing@huawei.com>
Date: Fri, 13 May 2022 08:34:36 +0000
Subject: [PATCH 331/340] scsi: hisi_sas: Don't fail IT nexus reset for Open
 Reject timeout

mainline inclusion
from mainline-v5.2-rc1
commit 246ea3c0ad02
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I538BF
CVE: NA

---------------------------------------------------------

Currently we call hisi_sas_softreset_ata_disk() in
hisi_sas_I_T_nexus_reset().

If this fails for open reject reason, there is no reason to fail the IT
nexus reset, so only fail for TMF_RESP_FUNC_FAILED.

Some other strings spilled over multiple lines are reunited.

Signed-off-by: Luo Jiaxing <luojiaxing@huawei.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Reviewed-by: kang fenglong <kangfenglong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 26 +++++++++++++++-----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 32f55248c356..6ee5168a6bd8 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1277,8 +1277,7 @@ static int hisi_sas_exec_internal_tmf_task(struct domain_device *device,
 			/* no error, but return the number of bytes of
 			 * underrun
 			 */
-			dev_warn(dev, "abort tmf: task to dev %016llx "
-				 "resp: 0x%x sts 0x%x underrun\n",
+			dev_warn(dev, "abort tmf: task to dev %016llx resp: 0x%x sts 0x%x underrun\n",
 				 SAS_ADDR(device->sas_addr),
 				 task->task_status.resp,
 				 task->task_status.stat);
@@ -1293,10 +1292,16 @@ static int hisi_sas_exec_internal_tmf_task(struct domain_device *device,
 			break;
 		}
 
-		dev_warn(dev, "abort tmf: task to dev "
-			 "%016llx resp: 0x%x status 0x%x\n",
-			 SAS_ADDR(device->sas_addr), task->task_status.resp,
-			 task->task_status.stat);
+		if (task->task_status.resp == SAS_TASK_COMPLETE &&
+		    task->task_status.stat == SAS_OPEN_REJECT) {
+			dev_warn(dev, "abort tmf: open reject failed\n");
+			res = -EIO;
+		} else {
+			dev_warn(dev, "abort tmf: task to dev %016llx resp: 0x%x status 0x%x\n",
+				 SAS_ADDR(device->sas_addr),
+				 task->task_status.resp,
+				 task->task_status.stat);
+		}
 		sas_free_task(task);
 		task = NULL;
 	}
@@ -1840,7 +1845,8 @@ static int hisi_sas_I_T_nexus_reset(struct domain_device *device)
 	if (dev_is_sata(device)) {
 		rc = hisi_sas_softreset_ata_disk(device);
 		if (rc)
-			return TMF_RESP_FUNC_FAILED;
+			dev_err(dev, "I_T nexus reset: softreset failed  (%d)\n",
+				rc);
 	}
 
 	rc = hisi_sas_debug_I_T_nexus_reset(device);
@@ -2124,10 +2130,8 @@ _hisi_sas_internal_task_abort(struct hisi_hba *hisi_hba,
 	}
 
 exit:
-	dev_dbg(dev, "internal task abort: task to dev %016llx task=%pK "
-		"resp: 0x%x sts 0x%x\n",
-		SAS_ADDR(device->sas_addr),
-		task,
+	dev_dbg(dev, "internal task abort: task to dev %016llx task=%p resp: 0x%x sts 0x%x\n",
+		SAS_ADDR(device->sas_addr), task,
 		task->task_status.resp, /* 0 is complete, -1 is undelivered */
 		task->task_status.stat);
 	sas_free_task(task);
-- 
Gitee


From 8fdd6417b411b8614315fa191de49775a35aa056 Mon Sep 17 00:00:00 2001
From: Xingui Yang <yangxingui@huawei.com>
Date: Fri, 13 May 2022 08:34:37 +0000
Subject: [PATCH 332/340] scsi: hisi_sas: Fix SAS disk sense info print
 incorrectly sometimes

driver inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4ZO9V
CVE: NA

----------------------------------

Sometimes disk response sense info, but driver print data underflow without
sense, and it's not correct. we use scsi_normalize_sense instead of
hisi_sas_get_sense_data to parse the sense info.

before:
data underflow without sense, rsp_code:0xf0, rc:-2.

after:
data underflow, rsp_code:0x70, sensekey:0x5, ASC:0x21, ASCQ:0x0.

Signed-off-by: Xingui Yang <yangxingui@huawei.com>
Reviewed-by: kang fenglong <kangfenglong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       |  7 +--
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 72 ++++++++------------------
 2 files changed, 23 insertions(+), 56 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index 2a70af0191af..f3bb10da8f86 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -26,6 +26,7 @@
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/regmap.h>
+#include <scsi/scsi_common.h>
 #include <scsi/sas_ata.h>
 #include <scsi/libsas.h>
 
@@ -572,12 +573,6 @@ struct hisi_sas_slot_dif_buf_table {
 	struct hisi_sas_sge_dif_page sge_dif_page;
 };
 
-struct hisi_sas_sense_data {
-	int sense_key;
-	int add_sense_code;
-	int add_sense_code_qua;
-};
-
 extern bool hisi_sas_debugfs_enable;
 extern struct dentry *hisi_sas_debugfs_dir;
 extern int skip_bus_flag;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index 3c662fa5f6c0..a268a2ec686c 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -2291,56 +2291,24 @@ slot_err_v3_hw(struct hisi_hba *hisi_hba, struct sas_task *task,
 	}
 }
 
-static int hisi_sas_get_sense_data(struct ssp_response_iu *iu,
-	struct hisi_sas_sense_data *sense_data)
-{
-	int rc = 0;
-
-	switch (iu->resp_data[0]) {
-	case 0x70:
-	case 0x71: {
-		if (iu->sense_data_len > 13) {
-			sense_data->sense_key = (iu->resp_data[2] & 0xF);
-			sense_data->add_sense_code = iu->resp_data[12];
-			sense_data->add_sense_code_qua = iu->resp_data[13];
-		} else
-			rc = -1;
-		break;
-	}
-	case 0x72:
-	case 0x73: {
-		if (iu->sense_data_len > 4) {
-			sense_data->sense_key = (iu->resp_data[1] & 0xF);
-			sense_data->add_sense_code = iu->resp_data[2];
-			sense_data->add_sense_code_qua = iu->resp_data[3];
-		} else
-			rc = -1;
-		break;
-	}
-	default:
-		rc = -2;
-		break;
-	}
-
-	return rc;
-}
-
 static int ssp_need_spin_up(struct hisi_sas_slot *slot)
 {
-	int rc;
-	struct hisi_sas_sense_data sense_data;
+	bool rc;
+	int sb_len;
+	u8 *sense_buffer;
+	struct scsi_sense_hdr sshdr;
 	struct ssp_response_iu *iu =
 		hisi_sas_status_buf_addr_mem(slot) +
 		sizeof(struct hisi_sas_err_record);
-	rc = hisi_sas_get_sense_data(iu, &sense_data);
+	sb_len = iu->sense_data_len;
+	sense_buffer = iu->sense_data;
+	rc = scsi_normalize_sense(sense_buffer, sb_len, &sshdr);
 
 	/*
 	 * if the SAS disk response with ASC=04h,
 	 * ASCQ=11h, host should send NOTIFY primitive.
 	 */
-	if (rc == 0 &&
-		sense_data.add_sense_code == 0x4 &&
-		sense_data.add_sense_code_qua == 0x11)
+	if (rc && sshdr.asc == 0x4 && sshdr.ascq == 0x11)
 		return 1;
 
 	return 0;
@@ -2444,22 +2412,26 @@ slot_complete_v3_hw(struct hisi_hba *hisi_hba, struct hisi_sas_slot *slot)
 		if ((error_info[3] & RX_DATA_LEN_UNDERFLOW_MSK) &&
 			(task->task_proto == SAS_PROTOCOL_SSP)) {
 			/*print detail sense info when data underflow happened*/
-			int rc;
-			struct hisi_sas_sense_data sense_data;
+			bool rc;
+			int sb_len;
+			u8 *sense_buffer;
+			struct scsi_sense_hdr sshdr;
 			struct ssp_response_iu *iu =
 				hisi_sas_status_buf_addr_mem(slot) +
 				sizeof(struct hisi_sas_err_record);
-			rc = hisi_sas_get_sense_data(iu, &sense_data);
 
-			if (!rc)
+			sb_len = iu->sense_data_len;
+			sense_buffer = iu->sense_data;
+			rc = scsi_normalize_sense(sense_buffer, sb_len, &sshdr);
+			if (rc)
 				dev_info(dev, "data underflow, rsp_code:0x%x, sensekey:0x%x, ASC:0x%x, ASCQ:0x%x.\n",
-					iu->resp_data[0],
-					sense_data.sense_key,
-					sense_data.add_sense_code,
-					sense_data.add_sense_code_qua);
+					sshdr.response_code,
+					sshdr.sense_key,
+					sshdr.asc,
+					sshdr.ascq);
 			else
-				dev_info(dev, "data underflow without sense, rsp_code:0x%x, rc:%d.\n",
-					iu->resp_data[0], rc);
+				dev_info(dev, "data underflow without sense, rsp_code:0x%02x.\n",
+					iu->resp_data[0]);
 		}
 		if (unlikely(slot->abort)) {
 			sas_task_abort(task);
-- 
Gitee


From 1bc9fed394dc8160191178e3250af71518ca073b Mon Sep 17 00:00:00 2001
From: Xiang Chen <chenxiang66@hisilicon.com>
Date: Fri, 13 May 2022 08:34:38 +0000
Subject: [PATCH 333/340] scsi: hisi_sas: Change hisi_sas_control_phy() phyup
 timeout

mainline inclusion
from mainline-5.17-rc1
commit 512623de5239
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I4UT5N
CVE: NA

-------------------------------------------

The time of phyup not only depends on the controller but also the type of
disk connected. As an example, from experience, for some SATA disks the
amount of time from reset/power-on to receive the D2H FIS for phyup can
take upto and more than 10s sometimes. According to the specification of
some SATA disks such as ST14000NM0018, the max time from power-on to ready
is 30s.

Based on this the current timeout of phyup at 2s which is not enough. So
set the value as HISI_SAS_WAIT_PHYUP_TIMEOUT (30s) in
hisi_sas_control_phy().

For v3 hw there is a pre-existing workaround for a HW bug, being that we
issue a link reset when the OOB occurs but the phyup does not. The current
phyup timeout is HISI_SAS_WAIT_PHYUP_TIMEOUT. So if this does occur from
when issuing a phy enable or similar via hisi_sas_control_phy(), the
subsequent HW workaround linkreset processing calls hisi_sas_control_phy(),
but this will pend the original phy reset timing out, so it is safe.

Link: https://lore.kernel.org/r/1645703489-87194-3-git-send-email-john.garry@huawei.com
Signed-off-by: Xiang Chen <chenxiang66@hisilicon.com>
Signed-off-by: John Garry <john.garry@huawei.com>
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Xingui Yang <yangxingui@huawei.com>
conflict:
	drivers/scsi/hisi_sas/hisi_sas.h
	drivers/scsi/hisi_sas/hisi_sas_main.c
Reviewed-by: Xingui Yang <yangxingui@huawei.com>
Reviewed-by: kang fenglong <kangfenglong@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/scsi/hisi_sas/hisi_sas.h       | 1 +
 drivers/scsi/hisi_sas/hisi_sas_main.c  | 8 ++++++--
 drivers/scsi/hisi_sas/hisi_sas_v2_hw.c | 3 +--
 drivers/scsi/hisi_sas/hisi_sas_v3_hw.c | 3 +--
 4 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas.h b/drivers/scsi/hisi_sas/hisi_sas.h
index f3bb10da8f86..a791401be7b3 100644
--- a/drivers/scsi/hisi_sas/hisi_sas.h
+++ b/drivers/scsi/hisi_sas/hisi_sas.h
@@ -88,6 +88,7 @@
 
 #define HISI_SAS_PROT_MASK (HISI_SAS_DIF_PROT_MASK | HISI_SAS_DIX_PROT_MASK)
 
+#define HISI_SAS_WAIT_PHYUP_TIMEOUT	(30 * HZ)
 #define CLEAR_ITCT_TIMEOUT	20
 
 struct hisi_hba;
diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 6ee5168a6bd8..93df1c6b94c7 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1783,6 +1783,7 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 	int rc, reset_type = (sas_dev->dev_status == HISI_SAS_DEV_INIT ||
 			      !dev_is_sata(device)) ? true : false;
 	struct hisi_hba *hisi_hba = dev_to_hisi_hba(device);
+	struct device *dev = hisi_hba->dev;
 	struct sas_ha_struct *sas_ha = &hisi_hba->sha;
 	DECLARE_COMPLETION_ONSTACK(phyreset);
 
@@ -1809,7 +1810,8 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 		struct hisi_sas_phy *phy =
 			container_of(sas_phy, struct hisi_sas_phy, sas_phy);
 		/* Wait for I_T reset complete, time out after 2s */
-		int ret = wait_for_completion_timeout(&phyreset, 2 * HZ);
+		int ret = wait_for_completion_timeout(&phyreset,
+				HISI_SAS_WAIT_PHYUP_TIMEOUT);
 		unsigned long flags;
 
 		spin_lock_irqsave(&phy->lock, flags);
@@ -1818,8 +1820,10 @@ static int hisi_sas_debug_I_T_nexus_reset(struct domain_device *device)
 		spin_unlock_irqrestore(&phy->lock, flags);
 
 		/* report PHY down if timed out */
-		if (!ret)
+		if (!ret) {
+			dev_warn(dev, "phy%d reset timeout\n", sas_phy->id);
 			hisi_sas_phy_down(hisi_hba, sas_phy->id, 0);
+		}
 	} else if (sas_dev->dev_status != HISI_SAS_DEV_INIT)
 		/* Sleep 2s, wait for I_T reset at expander env except fail */
 		if (!rc)
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
index 61ff065e5d9b..e6e489778014 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v2_hw.c
@@ -2874,7 +2874,6 @@ static const struct hisi_sas_hw_error port_ecc_axi_error[] = {
 	},
 };
 
-#define WAIT_PHYUP_TIMEOUT_V2_HW 20
 static void wait_phyup_timedout_v2_hw(struct timer_list *t)
 {
 	struct hisi_sas_phy *phy = from_timer(phy, t, timer);
@@ -2894,7 +2893,7 @@ static void phy_oob_ready_v2_hw(struct hisi_hba *hisi_hba, int phy_no)
 	if (!timer_pending(&phy->timer)) {
 		dev_dbg(dev, "phy%d OOB ready\n", phy_no);
 		phy->timer.function = wait_phyup_timedout_v2_hw;
-		phy->timer.expires = jiffies + WAIT_PHYUP_TIMEOUT_V2_HW * HZ;
+		phy->timer.expires = jiffies + HISI_SAS_WAIT_PHYUP_TIMEOUT;
 		add_timer(&phy->timer);
 	}
 }
diff --git a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
index a268a2ec686c..baae04adc8e4 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_v3_hw.c
@@ -1890,7 +1890,6 @@ static void handle_chl_int2_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
 	hisi_sas_phy_write32(hisi_hba, phy_no, CHL_INT2, irq_value);
 }
 
-#define WAIT_PHYUP_TIMEOUT_V3_HW 20
 static void wait_phyup_timedout_v3_hw(struct timer_list *t)
 {
 	struct hisi_sas_phy *phy = from_timer(phy, t, timer);
@@ -1917,7 +1916,7 @@ static void handle_chl_int0_v3_hw(struct hisi_hba *hisi_hba, int phy_no)
 		if (!timer_pending(&phy->timer)) {
 			phy->timer.function = wait_phyup_timedout_v3_hw;
 			phy->timer.expires = jiffies +
-					WAIT_PHYUP_TIMEOUT_V3_HW * HZ;
+					HISI_SAS_WAIT_PHYUP_TIMEOUT;
 			add_timer(&phy->timer);
 		}
 	}
-- 
Gitee


From f0993522412d567c96241f1dc7d9f2bd03d501f0 Mon Sep 17 00:00:00 2001
From: Liu Shixin <liushixin2@huawei.com>
Date: Tue, 17 May 2022 07:38:49 +0000
Subject: [PATCH 334/340] mm/memory.c: update the first page in
 clear_gigantic_page_chunk

hulk inclusion
category: bugfix
bugzilla: https://gitee.com/openeuler/kernel/issues/I57SM0
CVE: NA

--------------------------------

Patch "mm: parallelize clear_gigantic_page" make clear_gigantic_page
to be parallelized. But forgot to update the first page which results the
first page for each block is still the head page. Fix it by calculating
the first page for each block.

By the way, add a check to pointer p in order to prevent kernel panic.

Fixes: ae0cd4d46ced ("mm: parallelize clear_gigantic_page")
Signed-off-by: Liu Shixin <liushixin2@huawei.com>
Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 mm/memory.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 98c2872f8415..8be034c0f10b 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4721,14 +4721,15 @@ static int clear_gigantic_page_chunk(unsigned long start, unsigned long end,
 				     struct cgp_args *args)
 {
 	struct page *base_page = args->base_page;
-	struct page *p = base_page;
+	struct page *p = mem_map_offset(base_page, start);
 	unsigned long addr = args->addr;
 	unsigned long i;
 
 	might_sleep();
 	for (i = start; i < end; i++, p = mem_map_next(p, base_page, i)) {
 		cond_resched();
-		clear_user_highpage(p, addr + i * PAGE_SIZE);
+		if (p)
+			clear_user_highpage(p, addr + i * PAGE_SIZE);
 	}
 
 	return KTASK_RETURN_SUCCESS;
-- 
Gitee


From 1b42f1e2dc66c60fa5146652a7a67b2121046edf Mon Sep 17 00:00:00 2001
From: Daniel Vetter <daniel.vetter@ffwll.ch>
Date: Tue, 17 May 2022 07:38:50 +0000
Subject: [PATCH 335/340] drm/vgem: Close use-after-free race in
 vgem_gem_create

stable inclusion
from linux-4.19.242
commit df2c1f38939aabb8c6beca108f08b90f050b9ebc
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I574Y3
CVE: CVE-2022-1419

--------------------------------

commit 4b848f20eda5974020f043ca14bacf7a7e634fc8 upstream.

There's two references floating around here (for the object reference,
not the handle_count reference, that's a different thing):

- The temporary reference held by vgem_gem_create, acquired by
  creating the object and released by calling
  drm_gem_object_put_unlocked.

- The reference held by the object handle, created by
  drm_gem_handle_create. This one generally outlives the function,
  except if a 2nd thread races with a GEM_CLOSE ioctl call.

So usually everything is correct, except in that race case, where the
access to gem_object->size could be looking at freed data already.
Which again isn't a real problem (userspace shot its feet off already
with the race, we could return garbage), but maybe someone can exploit
this as an information leak.

Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Hillf Danton <hdanton@sina.com>
Reported-by: syzbot+0dc4444774d419e916c8@syzkaller.appspotmail.com
Cc: stable@vger.kernel.org
Cc: Emil Velikov <emil.velikov@collabora.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Sean Paul <seanpaul@chromium.org>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Eric Anholt <eric@anholt.net>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: Rob Clark <robdclark@chromium.org>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200202132133.1891846-1-daniel.vetter@ffwll.ch
[OP: backport to 4.19: adjusted DRM_DEBUG() -> DRM_DEBUG_DRIVER()]
Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
Reviewed-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/gpu/drm/vgem/vgem_drv.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 4709f08f39e4..02f518837978 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -189,9 +189,10 @@ static struct drm_gem_object *vgem_gem_create(struct drm_device *dev,
 		return ERR_CAST(obj);
 
 	ret = drm_gem_handle_create(file, &obj->base, handle);
-	drm_gem_object_put_unlocked(&obj->base);
-	if (ret)
+	if (ret) {
+		drm_gem_object_put_unlocked(&obj->base);
 		return ERR_PTR(ret);
+	}
 
 	return &obj->base;
 }
@@ -214,7 +215,9 @@ static int vgem_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 	args->size = gem_object->size;
 	args->pitch = pitch;
 
-	DRM_DEBUG_DRIVER("Created object of size %lld\n", size);
+	drm_gem_object_put_unlocked(gem_object);
+
+	DRM_DEBUG_DRIVER("Created object of size %llu\n", args->size);
 
 	return 0;
 }
-- 
Gitee


From 151284c8e9385a46bb01d69fd7484a7e552bce52 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 17 May 2022 11:35:55 +0000
Subject: [PATCH 336/340] ptrace: Check PTRACE_O_SUSPEND_SECCOMP permission on
 PTRACE_SEIZE

stable inclusion
from linux-4.19.238
commit b1f438f872dcda10a79e6aeaf06fd52dfb15a6ab
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I57IYD
CVE: CVE-2022-30594

--------------------------------

commit ee1fee900537b5d9560e9f937402de5ddc8412f3 upstream.

Setting PTRACE_O_SUSPEND_SECCOMP is supposed to be a highly privileged
operation because it allows the tracee to completely bypass all seccomp
filters on kernels with CONFIG_CHECKPOINT_RESTORE=y. It is only supposed to
be settable by a process with global CAP_SYS_ADMIN, and only if that
process is not subject to any seccomp filters at all.

However, while these permission checks were done on the PTRACE_SETOPTIONS
path, they were missing on the PTRACE_SEIZE path, which also sets
user-specified ptrace flags.

Move the permissions checks out into a helper function and let both
ptrace_attach() and ptrace_setoptions() call it.

Cc: stable@kernel.org
Fixes: 13c4a90119d2 ("seccomp: add ptrace options for suspend/resume")
Signed-off-by: Jann Horn <jannh@google.com>
Link: https://lkml.kernel.org/r/20220319010838.1386861-1-jannh@google.com
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Li Huafei <lihuafei1@huawei.com>
Reviewed-by: Xu Kuohai <xukuohai@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 kernel/ptrace.c | 47 ++++++++++++++++++++++++++++++++---------------
 1 file changed, 32 insertions(+), 15 deletions(-)

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f32095d7aa5e..424a04cdda1d 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -365,6 +365,26 @@ bool ptrace_may_access(struct task_struct *task, unsigned int mode)
 	return !err;
 }
 
+static int check_ptrace_options(unsigned long data)
+{
+	if (data & ~(unsigned long)PTRACE_O_MASK)
+		return -EINVAL;
+
+	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
+		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
+		    !IS_ENABLED(CONFIG_SECCOMP))
+			return -EINVAL;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
+		    current->ptrace & PT_SUSPEND_SECCOMP)
+			return -EPERM;
+	}
+	return 0;
+}
+
 static int ptrace_attach(struct task_struct *task, long request,
 			 unsigned long addr,
 			 unsigned long flags)
@@ -376,8 +396,16 @@ static int ptrace_attach(struct task_struct *task, long request,
 	if (seize) {
 		if (addr != 0)
 			goto out;
+		/*
+		 * This duplicates the check in check_ptrace_options() because
+		 * ptrace_attach() and ptrace_setoptions() have historically
+		 * used different error codes for unknown ptrace options.
+		 */
 		if (flags & ~(unsigned long)PTRACE_O_MASK)
 			goto out;
+		retval = check_ptrace_options(flags);
+		if (retval)
+			return retval;
 		flags = PT_PTRACED | PT_SEIZED | (flags << PT_OPT_FLAG_SHIFT);
 	} else {
 		flags = PT_PTRACED;
@@ -650,22 +678,11 @@ int ptrace_writedata(struct task_struct *tsk, char __user *src, unsigned long ds
 static int ptrace_setoptions(struct task_struct *child, unsigned long data)
 {
 	unsigned flags;
+	int ret;
 
-	if (data & ~(unsigned long)PTRACE_O_MASK)
-		return -EINVAL;
-
-	if (unlikely(data & PTRACE_O_SUSPEND_SECCOMP)) {
-		if (!IS_ENABLED(CONFIG_CHECKPOINT_RESTORE) ||
-		    !IS_ENABLED(CONFIG_SECCOMP))
-			return -EINVAL;
-
-		if (!capable(CAP_SYS_ADMIN))
-			return -EPERM;
-
-		if (seccomp_mode(&current->seccomp) != SECCOMP_MODE_DISABLED ||
-		    current->ptrace & PT_SUSPEND_SECCOMP)
-			return -EPERM;
-	}
+	ret = check_ptrace_options(data);
+	if (ret)
+		return ret;
 
 	/* Avoid intermediate state when all opts are cleared */
 	flags = child->ptrace;
-- 
Gitee


From 7734aa05f292e4bb8b79d8617aa85db4fa4d0d1a Mon Sep 17 00:00:00 2001
From: Duoming Zhou <duoming@zju.edu.cn>
Date: Tue, 17 May 2022 11:35:56 +0000
Subject: [PATCH 337/340] ax25: Fix UAF bugs in ax25 timers

stable inclusion
from stable-v4.19.240
commit 3082f32c45465b692c314131c2a3657e0c23e09d
bugzilla: 186594, https://gitee.com/src-openeuler/kernel/issues/I578X1
CVE: CVE-2022-1205

--------------------------------

commit 82e31755e55fbcea6a9dfaae5fe4860ade17cbc0 upstream.

There are race conditions that may lead to UAF bugs in
ax25_heartbeat_expiry(), ax25_t1timer_expiry(), ax25_t2timer_expiry(),
ax25_t3timer_expiry() and ax25_idletimer_expiry(), when we call
ax25_release() to deallocate ax25_dev.

One of the UAF bugs caused by ax25_release() is shown below:

      (Thread 1)                    |      (Thread 2)
ax25_dev_device_up() //(1)          |
...                                 | ax25_kill_by_device()
ax25_bind()          //(2)          |
ax25_connect()                      | ...
 ax25_std_establish_data_link()     |
  ax25_start_t1timer()              | ax25_dev_device_down() //(3)
   mod_timer(&ax25->t1timer,..)     |
                                    | ax25_release()
   (wait a time)                    |  ...
                                    |  ax25_dev_put(ax25_dev) //(4)FREE
   ax25_t1timer_expiry()            |
    ax25->ax25_dev->values[..] //USE|  ...
     ...                            |

We increase the refcount of ax25_dev in position (1) and (2), and
decrease the refcount of ax25_dev in position (3) and (4).
The ax25_dev will be freed in position (4) and be used in
ax25_t1timer_expiry().

The fail log is shown below:

[  106.116942] BUG: KASAN: use-after-free in ax25_t1timer_expiry+0x1c/0x60
[  106.116942] Read of size 8 at addr ffff88800bda9028 by task swapper/0/0
[  106.116942] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.17.0-06123-g0905eec574
[  106.116942] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-14
[  106.116942] Call Trace:
...
[  106.116942]  ax25_t1timer_expiry+0x1c/0x60
[  106.116942]  call_timer_fn+0x122/0x3d0
[  106.116942]  __run_timers.part.0+0x3f6/0x520
[  106.116942]  run_timer_softirq+0x4f/0xb0
[  106.116942]  __do_softirq+0x1c2/0x651
...

This patch adds del_timer_sync() in ax25_release(), which could ensure
that all timers stop before we deallocate ax25_dev.

Signed-off-by: Duoming Zhou <duoming@zju.edu.cn>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
[OP: backport to 4.19: adjust context]
Signed-off-by: Ovidiu Panait <ovidiu.panait@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Lu Wei <luwei32@huawei.com>
Reviewed-by: Yue Haibing <yuehaibing@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/ax25/af_ax25.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1fab69daa968..4ecdde530265 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1053,6 +1053,11 @@ static int ax25_release(struct socket *sock)
 		ax25_destroy_socket(ax25);
 	}
 	if (ax25_dev) {
+		del_timer_sync(&ax25->timer);
+		del_timer_sync(&ax25->t1timer);
+		del_timer_sync(&ax25->t2timer);
+		del_timer_sync(&ax25->t3timer);
+		del_timer_sync(&ax25->idletimer);
 		dev_put(ax25_dev->dev);
 		ax25_dev_put(ax25_dev);
 	}
-- 
Gitee


From e10b517065fa5fc02bc791b10d830a19a84f271a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 17 May 2022 11:35:57 +0000
Subject: [PATCH 338/340] Revert "SUNRPC: attempt AF_LOCAL connect on setup"

mainline inclusion
from mainline-v5.18-rc6
commit a3d0562d4dc039bca39445e1cddde7951662e17d
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I57MRF
CVE: NA

-------------------------------------------

This reverts commit 7073ea8799a8cf73db60270986f14e4aae20fa80.

We must not try to connect the socket while the transport is under
construction, because the mechanisms to safely tear it down are not in
place. As the code stands, we end up leaking the sockets on a connection
error.

Reported-by: wanghai (M) <wanghai38@huawei.com>
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Wang Hai <wanghai38@huawei.com>
Reviewed-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 net/sunrpc/xprtsock.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index f93d386ae923..38598edcd86e 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2978,9 +2978,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
 		}
 		xprt_set_bound(xprt);
 		xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
-		ret = ERR_PTR(xs_local_setup_socket(transport));
-		if (ret)
-			goto out_err;
 		break;
 	default:
 		ret = ERR_PTR(-EAFNOSUPPORT);
-- 
Gitee


From 37c00cf3cbdafd97527feae0b8e52bf788435fef Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 17 May 2022 11:35:58 +0000
Subject: [PATCH 339/340] SUNRPC: Ensure that the gssproxy client can start in
 a connected state

mainline inclusion
from mainline-v5.18-rc7
commit fd13359f54ee854f00134abc6be32da94ec53dbf
category: bugfix
bugzilla: https://gitee.com/src-openeuler/kernel/issues/I57MRF
CVE: NA

-------------------------------------------

Ensure that the gssproxy client connects to the server from the gssproxy
daemon process context so that the AF_LOCAL socket connection is done
using the correct path and namespaces.

Fixes: 1d658336b05f ("SUNRPC: Add RPC based upcall mechanism for RPCGSS auth")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>

conflicts:
	include/linux/sunrpc/clnt.h
	net/sunrpc/clnt.c

Signed-off-by: Wang Hai <wanghai38@huawei.com>
Reviewed-by: Zhang Xiaoxu <zhangxiaoxu5@huawei.com>
Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 include/linux/sunrpc/clnt.h          |  1 +
 net/sunrpc/auth_gss/gss_rpc_upcall.c |  1 +
 net/sunrpc/clnt.c                    | 37 ++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+)

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 10bcbea6e952..8aa865bce4f6 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -144,6 +144,7 @@ struct rpc_add_xprt_test {
 #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT	(1UL << 8)
 #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT	(1UL << 9)
 #define RPC_CLNT_CREATE_REUSEPORT	(1UL << 11)
+#define RPC_CLNT_CREATE_CONNECTED	(1UL << 12)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt	*rpc_bind_new_program(struct rpc_clnt *,
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 73dcda060335..60fb9529c6ad 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -111,6 +111,7 @@ static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
 		 * done without the correct namespace:
 		 */
 		.flags		= RPC_CLNT_CREATE_NOPING |
+				  RPC_CLNT_CREATE_CONNECTED |
 				  RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
 	};
 	struct rpc_clnt *clnt;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9ac94c774335..dc58c227f37c 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,6 +79,7 @@ static void	call_connect_status(struct rpc_task *task);
 static __be32	*rpc_encode_header(struct rpc_task *task);
 static __be32	*rpc_verify_header(struct rpc_task *task);
 static int	rpc_ping(struct rpc_clnt *clnt);
+static int	rpc_ping_noreply(struct rpc_clnt *clnt);
 
 static void rpc_register_client(struct rpc_clnt *clnt)
 {
@@ -481,6 +482,12 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
 			rpc_shutdown_client(clnt);
 			return ERR_PTR(err);
 		}
+	} else if (args->flags & RPC_CLNT_CREATE_CONNECTED) {
+		int err = rpc_ping_noreply(clnt);
+		if (err != 0) {
+			rpc_shutdown_client(clnt);
+			return ERR_PTR(err);
+		}
 	}
 
 	clnt->cl_softrtry = 1;
@@ -2545,6 +2552,10 @@ static const struct rpc_procinfo rpcproc_null = {
 	.p_decode = rpcproc_decode_null,
 };
 
+static const struct rpc_procinfo rpcproc_null_noreply = {
+	.p_encode = rpcproc_encode_null,
+};
+
 static int rpc_ping(struct rpc_clnt *clnt)
 {
 	struct rpc_message msg = {
@@ -2557,6 +2568,32 @@ static int rpc_ping(struct rpc_clnt *clnt)
 	return err;
 }
 
+static int rpc_ping_noreply(struct rpc_clnt *clnt)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &rpcproc_null_noreply,
+	};
+	struct rpc_task_setup task_setup_data = {
+		.rpc_client = clnt,
+		.rpc_message = &msg,
+		.callback_ops = &rpc_default_ops,
+		.flags = RPC_TASK_SOFT | RPC_TASK_SOFTCONN,
+	};
+	struct rpc_task	*task;
+	int status;
+
+	msg.rpc_cred = authnull_ops.lookup_cred(NULL, NULL, 0);
+	task = rpc_run_task(&task_setup_data);
+	if (IS_ERR(task)) {
+		put_rpccred(msg.rpc_cred);
+		return PTR_ERR(task);
+	}
+	status = task->tk_status;
+	rpc_put_task(task);
+	put_rpccred(msg.rpc_cred);
+	return status;
+}
+
 static
 struct rpc_task *rpc_call_null_helper(struct rpc_clnt *clnt,
 		struct rpc_xprt *xprt, struct rpc_cred *cred, int flags,
-- 
Gitee


From 458d642311d9a39c8c68ab09e0fa60c528d78f7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Christian=20L=C3=B6hle?= <CLoehle@hyperstone.com>
Date: Tue, 17 May 2022 12:03:16 +0000
Subject: [PATCH 340/340] mmc: block: fix read single on recovery logic

mainline inclusion
from mainline-v5.17-rc5
commit 54309fde1a352ad2674ebba004a79f7d20b9f037
category: bugfix
bugzilla: 186729, https://gitee.com/openeuler/kernel/issues/I578BN
CVE: CVE-2022-20008

--------------------------------

On reads with MMC_READ_MULTIPLE_BLOCK that fail,
the recovery handler will use MMC_READ_SINGLE_BLOCK for
each of the blocks, up to MMC_READ_SINGLE_RETRIES times each.
The logic for this is fixed to never report unsuccessful reads
as success to the block layer.

On command error with retries remaining, blk_update_request was
called with whatever value error was set last to.
In case it was last set to BLK_STS_OK (default), the read will be
reported as success, even though there was no data read from the device.
This could happen on a CRC mismatch for the response,
a card rejecting the command (e.g. again due to a CRC mismatch).
In case it was last set to BLK_STS_IOERR, the error is reported correctly,
but no retries will be attempted.

Fixes: 81196976ed946c ("mmc: block: Add blk-mq support")
Cc: stable@vger.kernel.org
Signed-off-by: Christian Loehle <cloehle@hyperstone.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Link: https://lore.kernel.org/r/bc706a6ab08c4fe2834ba0c05a804672@hyperstone.com
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>

Conflict: commit 40c96853fef1 ("mmc: core: Enable re-use of
mmc_blk_in_tran_state()") is not backported, mmc_ready_for_data()
doesn't exist, use mmc_blk_in_tran_state() instead.
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com>
Signed-off-by: Yongqiang Liu <liuyongqiang13@huawei.com>
---
 drivers/mmc/core/block.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/mmc/core/block.c b/drivers/mmc/core/block.c
index 60eac66dc9f0..890a21bc3e28 100644
--- a/drivers/mmc/core/block.c
+++ b/drivers/mmc/core/block.c
@@ -1658,31 +1658,31 @@ static void mmc_blk_read_single(struct mmc_queue *mq, struct request *req)
 	struct mmc_card *card = mq->card;
 	struct mmc_host *host = card->host;
 	blk_status_t error = BLK_STS_OK;
-	int retries = 0;
 
 	do {
 		u32 status;
 		int err;
+		int retries = 0;
 
-		mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
+		while (retries++ <= MMC_READ_SINGLE_RETRIES) {
+			mmc_blk_rw_rq_prep(mqrq, card, 1, mq);
 
-		mmc_wait_for_req(host, mrq);
+			mmc_wait_for_req(host, mrq);
 
-		err = mmc_send_status(card, &status);
-		if (err)
-			goto error_exit;
-
-		if (!mmc_host_is_spi(host) &&
-		    !mmc_blk_in_tran_state(status)) {
-			err = mmc_blk_fix_state(card, req);
+			err = mmc_send_status(card, &status);
 			if (err)
 				goto error_exit;
-		}
 
-		if (mrq->cmd->error && retries++ < MMC_READ_SINGLE_RETRIES)
-			continue;
+			if (!mmc_host_is_spi(host) &&
+			    !mmc_blk_in_tran_state(status)) {
+				err = mmc_blk_fix_state(card, req);
+				if (err)
+					goto error_exit;
+			}
 
-		retries = 0;
+			if (!mrq->cmd->error)
+				break;
+		}
 
 		if (mrq->cmd->error ||
 		    mrq->data->error ||
-- 
Gitee