Skip to content

Commit f3fcf9d

Browse files
Luis Henriquesgregkh
authored andcommitted
ceph: reduce contention in ceph_check_delayed_caps()
commit bf2ba43 upstream. Function ceph_check_delayed_caps() is called from the mdsc->delayed_work workqueue and it can be kept looping for quite some time if caps keep being added back to the mdsc->cap_delay_list. This may result in the watchdog tainting the kernel with the softlockup flag. This patch breaks this loop if the caps have been recently (i.e. during the loop execution). Any new caps added to the list will be handled in the next run. Also, allow schedule_delayed() callers to explicitly set the delay value instead of defaulting to 5s, so we can ensure that it runs soon afterward if it looks like there is more work. Cc: stable@vger.kernel.org URL: https://tracker.ceph.com/issues/46284 Signed-off-by: Luis Henriques <lhenriques@suse.de> Reviewed-by: Jeff Layton <jlayton@kernel.org> Signed-off-by: Ilya Dryomov <idryomov@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
1 parent ca6dea4 commit f3fcf9d

3 files changed

Lines changed: 33 additions & 11 deletions

File tree

fs/ceph/caps.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4202,18 +4202,31 @@ void ceph_handle_caps(struct ceph_mds_session *session,
42024202

42034203
/*
42044204
* Delayed work handler to process end of delayed cap release LRU list.
4205+
*
4206+
* If new caps are added to the list while processing it, these won't get
4207+
* processed in this run. In this case, the ci->i_hold_caps_max will be
4208+
* returned so that the work can be scheduled accordingly.
42054209
*/
4206-
void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
4210+
unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
42074211
{
42084212
struct inode *inode;
42094213
struct ceph_inode_info *ci;
4214+
struct ceph_mount_options *opt = mdsc->fsc->mount_options;
4215+
unsigned long delay_max = opt->caps_wanted_delay_max * HZ;
4216+
unsigned long loop_start = jiffies;
4217+
unsigned long delay = 0;
42104218

42114219
dout("check_delayed_caps\n");
42124220
spin_lock(&mdsc->cap_delay_lock);
42134221
while (!list_empty(&mdsc->cap_delay_list)) {
42144222
ci = list_first_entry(&mdsc->cap_delay_list,
42154223
struct ceph_inode_info,
42164224
i_cap_delay_list);
4225+
if (time_before(loop_start, ci->i_hold_caps_max - delay_max)) {
4226+
dout("%s caps added recently. Exiting loop", __func__);
4227+
delay = ci->i_hold_caps_max;
4228+
break;
4229+
}
42174230
if ((ci->i_ceph_flags & CEPH_I_FLUSH) == 0 &&
42184231
time_before(jiffies, ci->i_hold_caps_max))
42194232
break;
@@ -4230,6 +4243,8 @@ void ceph_check_delayed_caps(struct ceph_mds_client *mdsc)
42304243
}
42314244
}
42324245
spin_unlock(&mdsc->cap_delay_lock);
4246+
4247+
return delay;
42334248
}
42344249

42354250
/*

fs/ceph/mds_client.c

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4435,22 +4435,29 @@ void inc_session_sequence(struct ceph_mds_session *s)
44354435
}
44364436

44374437
/*
4438-
* delayed work -- periodically trim expired leases, renew caps with mds
4438+
* delayed work -- periodically trim expired leases, renew caps with mds. If
4439+
* the @delay parameter is set to 0 or if it's more than 5 secs, the default
4440+
* workqueue delay value of 5 secs will be used.
44394441
*/
4440-
static void schedule_delayed(struct ceph_mds_client *mdsc)
4442+
static void schedule_delayed(struct ceph_mds_client *mdsc, unsigned long delay)
44414443
{
4442-
int delay = 5;
4443-
unsigned hz = round_jiffies_relative(HZ * delay);
4444-
schedule_delayed_work(&mdsc->delayed_work, hz);
4444+
unsigned long max_delay = HZ * 5;
4445+
4446+
/* 5 secs default delay */
4447+
if (!delay || (delay > max_delay))
4448+
delay = max_delay;
4449+
schedule_delayed_work(&mdsc->delayed_work,
4450+
round_jiffies_relative(delay));
44454451
}
44464452

44474453
static void delayed_work(struct work_struct *work)
44484454
{
4449-
int i;
44504455
struct ceph_mds_client *mdsc =
44514456
container_of(work, struct ceph_mds_client, delayed_work.work);
4457+
unsigned long delay;
44524458
int renew_interval;
44534459
int renew_caps;
4460+
int i;
44544461

44554462
dout("mdsc delayed_work\n");
44564463

@@ -4490,15 +4497,15 @@ static void delayed_work(struct work_struct *work)
44904497
}
44914498
mutex_unlock(&mdsc->mutex);
44924499

4493-
ceph_check_delayed_caps(mdsc);
4500+
delay = ceph_check_delayed_caps(mdsc);
44944501

44954502
ceph_queue_cap_reclaim_work(mdsc);
44964503

44974504
ceph_trim_snapid_map(mdsc);
44984505

44994506
maybe_recover_session(mdsc);
45004507

4501-
schedule_delayed(mdsc);
4508+
schedule_delayed(mdsc, delay);
45024509
}
45034510

45044511
int ceph_mdsc_init(struct ceph_fs_client *fsc)
@@ -4984,7 +4991,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
49844991
mdsc->mdsmap->m_epoch);
49854992

49864993
mutex_unlock(&mdsc->mutex);
4987-
schedule_delayed(mdsc);
4994+
schedule_delayed(mdsc, 0);
49884995
return;
49894996

49904997
bad_unlock:

fs/ceph/super.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1138,7 +1138,7 @@ extern void ceph_flush_snaps(struct ceph_inode_info *ci,
11381138
extern bool __ceph_should_report_size(struct ceph_inode_info *ci);
11391139
extern void ceph_check_caps(struct ceph_inode_info *ci, int flags,
11401140
struct ceph_mds_session *session);
1141-
extern void ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
1141+
extern unsigned long ceph_check_delayed_caps(struct ceph_mds_client *mdsc);
11421142
extern void ceph_flush_dirty_caps(struct ceph_mds_client *mdsc);
11431143
extern int ceph_drop_caps_for_unlink(struct inode *inode);
11441144
extern int ceph_encode_inode_release(void **p, struct inode *inode,

0 commit comments

Comments
 (0)