|
@@ -257,6 +257,47 @@ static inline void cluster_set_null(struct swap_cluster_info *info)
|
|
|
info->data = 0;
|
|
info->data = 0;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
|
|
|
|
|
+ unsigned long offset)
|
|
|
|
|
+{
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
|
|
+
|
|
|
|
|
+ ci = si->cluster_info;
|
|
|
|
|
+ if (ci) {
|
|
|
|
|
+ ci += offset / SWAPFILE_CLUSTER;
|
|
|
|
|
+ spin_lock(&ci->lock);
|
|
|
|
|
+ }
|
|
|
|
|
+ return ci;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static inline void unlock_cluster(struct swap_cluster_info *ci)
|
|
|
|
|
+{
|
|
|
|
|
+ if (ci)
|
|
|
|
|
+ spin_unlock(&ci->lock);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static inline struct swap_cluster_info *lock_cluster_or_swap_info(
|
|
|
|
|
+ struct swap_info_struct *si,
|
|
|
|
|
+ unsigned long offset)
|
|
|
|
|
+{
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
|
|
+
|
|
|
|
|
+ ci = lock_cluster(si, offset);
|
|
|
|
|
+ if (!ci)
|
|
|
|
|
+ spin_lock(&si->lock);
|
|
|
|
|
+
|
|
|
|
|
+ return ci;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
|
|
+static inline void unlock_cluster_or_swap_info(struct swap_info_struct *si,
|
|
|
|
|
+ struct swap_cluster_info *ci)
|
|
|
|
|
+{
|
|
|
|
|
+ if (ci)
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
|
|
+ else
|
|
|
|
|
+ spin_unlock(&si->lock);
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
static inline bool cluster_list_empty(struct swap_cluster_list *list)
|
|
static inline bool cluster_list_empty(struct swap_cluster_list *list)
|
|
|
{
|
|
{
|
|
|
return cluster_is_null(&list->head);
|
|
return cluster_is_null(&list->head);
|
|
@@ -281,9 +322,17 @@ static void cluster_list_add_tail(struct swap_cluster_list *list,
|
|
|
cluster_set_next_flag(&list->head, idx, 0);
|
|
cluster_set_next_flag(&list->head, idx, 0);
|
|
|
cluster_set_next_flag(&list->tail, idx, 0);
|
|
cluster_set_next_flag(&list->tail, idx, 0);
|
|
|
} else {
|
|
} else {
|
|
|
|
|
+ struct swap_cluster_info *ci_tail;
|
|
|
unsigned int tail = cluster_next(&list->tail);
|
|
unsigned int tail = cluster_next(&list->tail);
|
|
|
|
|
|
|
|
- cluster_set_next(&ci[tail], idx);
|
|
|
|
|
|
|
+ /*
|
|
|
|
|
+ * Nested cluster lock, but both cluster locks are
|
|
|
|
|
+ * only acquired when we held swap_info_struct->lock
|
|
|
|
|
+ */
|
|
|
|
|
+ ci_tail = ci + tail;
|
|
|
|
|
+ spin_lock_nested(&ci_tail->lock, SINGLE_DEPTH_NESTING);
|
|
|
|
|
+ cluster_set_next(ci_tail, idx);
|
|
|
|
|
+ unlock_cluster(ci_tail);
|
|
|
cluster_set_next_flag(&list->tail, idx, 0);
|
|
cluster_set_next_flag(&list->tail, idx, 0);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
@@ -328,7 +377,7 @@ static void swap_cluster_schedule_discard(struct swap_info_struct *si,
|
|
|
*/
|
|
*/
|
|
|
static void swap_do_scheduled_discard(struct swap_info_struct *si)
|
|
static void swap_do_scheduled_discard(struct swap_info_struct *si)
|
|
|
{
|
|
{
|
|
|
- struct swap_cluster_info *info;
|
|
|
|
|
|
|
+ struct swap_cluster_info *info, *ci;
|
|
|
unsigned int idx;
|
|
unsigned int idx;
|
|
|
|
|
|
|
|
info = si->cluster_info;
|
|
info = si->cluster_info;
|
|
@@ -341,10 +390,14 @@ static void swap_do_scheduled_discard(struct swap_info_struct *si)
|
|
|
SWAPFILE_CLUSTER);
|
|
SWAPFILE_CLUSTER);
|
|
|
|
|
|
|
|
spin_lock(&si->lock);
|
|
spin_lock(&si->lock);
|
|
|
- cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
|
|
|
|
|
|
|
+ ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
|
|
|
|
|
+ cluster_set_flag(ci, CLUSTER_FLAG_FREE);
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
cluster_list_add_tail(&si->free_clusters, info, idx);
|
|
cluster_list_add_tail(&si->free_clusters, info, idx);
|
|
|
|
|
+ ci = lock_cluster(si, idx * SWAPFILE_CLUSTER);
|
|
|
memset(si->swap_map + idx * SWAPFILE_CLUSTER,
|
|
memset(si->swap_map + idx * SWAPFILE_CLUSTER,
|
|
|
0, SWAPFILE_CLUSTER);
|
|
0, SWAPFILE_CLUSTER);
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -447,8 +500,9 @@ static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
|
|
|
unsigned long *offset, unsigned long *scan_base)
|
|
unsigned long *offset, unsigned long *scan_base)
|
|
|
{
|
|
{
|
|
|
struct percpu_cluster *cluster;
|
|
struct percpu_cluster *cluster;
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
bool found_free;
|
|
bool found_free;
|
|
|
- unsigned long tmp;
|
|
|
|
|
|
|
+ unsigned long tmp, max;
|
|
|
|
|
|
|
|
new_cluster:
|
|
new_cluster:
|
|
|
cluster = this_cpu_ptr(si->percpu_cluster);
|
|
cluster = this_cpu_ptr(si->percpu_cluster);
|
|
@@ -476,14 +530,21 @@ new_cluster:
|
|
|
* check if there is still free entry in the cluster
|
|
* check if there is still free entry in the cluster
|
|
|
*/
|
|
*/
|
|
|
tmp = cluster->next;
|
|
tmp = cluster->next;
|
|
|
- while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
|
|
|
|
|
- SWAPFILE_CLUSTER) {
|
|
|
|
|
|
|
+ max = min_t(unsigned long, si->max,
|
|
|
|
|
+ (cluster_next(&cluster->index) + 1) * SWAPFILE_CLUSTER);
|
|
|
|
|
+ if (tmp >= max) {
|
|
|
|
|
+ cluster_set_null(&cluster->index);
|
|
|
|
|
+ goto new_cluster;
|
|
|
|
|
+ }
|
|
|
|
|
+ ci = lock_cluster(si, tmp);
|
|
|
|
|
+ while (tmp < max) {
|
|
|
if (!si->swap_map[tmp]) {
|
|
if (!si->swap_map[tmp]) {
|
|
|
found_free = true;
|
|
found_free = true;
|
|
|
break;
|
|
break;
|
|
|
}
|
|
}
|
|
|
tmp++;
|
|
tmp++;
|
|
|
}
|
|
}
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
if (!found_free) {
|
|
if (!found_free) {
|
|
|
cluster_set_null(&cluster->index);
|
|
cluster_set_null(&cluster->index);
|
|
|
goto new_cluster;
|
|
goto new_cluster;
|
|
@@ -496,6 +557,7 @@ new_cluster:
|
|
|
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
|
static unsigned long scan_swap_map(struct swap_info_struct *si,
|
|
|
unsigned char usage)
|
|
unsigned char usage)
|
|
|
{
|
|
{
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
unsigned long offset;
|
|
unsigned long offset;
|
|
|
unsigned long scan_base;
|
|
unsigned long scan_base;
|
|
|
unsigned long last_in_cluster = 0;
|
|
unsigned long last_in_cluster = 0;
|
|
@@ -572,9 +634,11 @@ checks:
|
|
|
if (offset > si->highest_bit)
|
|
if (offset > si->highest_bit)
|
|
|
scan_base = offset = si->lowest_bit;
|
|
scan_base = offset = si->lowest_bit;
|
|
|
|
|
|
|
|
|
|
+ ci = lock_cluster(si, offset);
|
|
|
/* reuse swap entry of cache-only swap if not busy. */
|
|
/* reuse swap entry of cache-only swap if not busy. */
|
|
|
if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
|
|
if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
|
|
|
int swap_was_freed;
|
|
int swap_was_freed;
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
spin_unlock(&si->lock);
|
|
spin_unlock(&si->lock);
|
|
|
swap_was_freed = __try_to_reclaim_swap(si, offset);
|
|
swap_was_freed = __try_to_reclaim_swap(si, offset);
|
|
|
spin_lock(&si->lock);
|
|
spin_lock(&si->lock);
|
|
@@ -584,8 +648,10 @@ checks:
|
|
|
goto scan; /* check next one */
|
|
goto scan; /* check next one */
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- if (si->swap_map[offset])
|
|
|
|
|
|
|
+ if (si->swap_map[offset]) {
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
goto scan;
|
|
goto scan;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
if (offset == si->lowest_bit)
|
|
if (offset == si->lowest_bit)
|
|
|
si->lowest_bit++;
|
|
si->lowest_bit++;
|
|
@@ -601,6 +667,7 @@ checks:
|
|
|
}
|
|
}
|
|
|
si->swap_map[offset] = usage;
|
|
si->swap_map[offset] = usage;
|
|
|
inc_cluster_info_page(si, si->cluster_info, offset);
|
|
inc_cluster_info_page(si, si->cluster_info, offset);
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
si->cluster_next = offset + 1;
|
|
si->cluster_next = offset + 1;
|
|
|
si->flags -= SWP_SCANNING;
|
|
si->flags -= SWP_SCANNING;
|
|
|
|
|
|
|
@@ -731,7 +798,7 @@ swp_entry_t get_swap_page_of_type(int type)
|
|
|
return (swp_entry_t) {0};
|
|
return (swp_entry_t) {0};
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
-static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
|
|
|
|
|
|
+static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
|
|
|
{
|
|
{
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
unsigned long offset, type;
|
|
unsigned long offset, type;
|
|
@@ -749,7 +816,6 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
|
|
goto bad_offset;
|
|
goto bad_offset;
|
|
|
if (!p->swap_map[offset])
|
|
if (!p->swap_map[offset])
|
|
|
goto bad_free;
|
|
goto bad_free;
|
|
|
- spin_lock(&p->lock);
|
|
|
|
|
return p;
|
|
return p;
|
|
|
|
|
|
|
|
bad_free:
|
|
bad_free:
|
|
@@ -767,14 +833,45 @@ out:
|
|
|
return NULL;
|
|
return NULL;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+static struct swap_info_struct *swap_info_get(swp_entry_t entry)
|
|
|
|
|
+{
|
|
|
|
|
+ struct swap_info_struct *p;
|
|
|
|
|
+
|
|
|
|
|
+ p = _swap_info_get(entry);
|
|
|
|
|
+ if (p)
|
|
|
|
|
+ spin_lock(&p->lock);
|
|
|
|
|
+ return p;
|
|
|
|
|
+}
|
|
|
|
|
+
|
|
|
static unsigned char swap_entry_free(struct swap_info_struct *p,
|
|
static unsigned char swap_entry_free(struct swap_info_struct *p,
|
|
|
- swp_entry_t entry, unsigned char usage)
|
|
|
|
|
|
|
+ swp_entry_t entry, unsigned char usage,
|
|
|
|
|
+ bool swap_info_locked)
|
|
|
{
|
|
{
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
unsigned long offset = swp_offset(entry);
|
|
unsigned long offset = swp_offset(entry);
|
|
|
unsigned char count;
|
|
unsigned char count;
|
|
|
unsigned char has_cache;
|
|
unsigned char has_cache;
|
|
|
|
|
+ bool lock_swap_info = false;
|
|
|
|
|
+
|
|
|
|
|
+ if (!swap_info_locked) {
|
|
|
|
|
+ count = p->swap_map[offset];
|
|
|
|
|
+ if (!p->cluster_info || count == usage || count == SWAP_MAP_SHMEM) {
|
|
|
|
|
+lock_swap_info:
|
|
|
|
|
+ swap_info_locked = true;
|
|
|
|
|
+ lock_swap_info = true;
|
|
|
|
|
+ spin_lock(&p->lock);
|
|
|
|
|
+ }
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ ci = lock_cluster(p, offset);
|
|
|
|
|
|
|
|
count = p->swap_map[offset];
|
|
count = p->swap_map[offset];
|
|
|
|
|
+
|
|
|
|
|
+ if (!swap_info_locked && (count == usage || count == SWAP_MAP_SHMEM)) {
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
|
|
+ goto lock_swap_info;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
has_cache = count & SWAP_HAS_CACHE;
|
|
has_cache = count & SWAP_HAS_CACHE;
|
|
|
count &= ~SWAP_HAS_CACHE;
|
|
count &= ~SWAP_HAS_CACHE;
|
|
|
|
|
|
|
@@ -800,10 +897,15 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
|
|
|
usage = count | has_cache;
|
|
usage = count | has_cache;
|
|
|
p->swap_map[offset] = usage;
|
|
p->swap_map[offset] = usage;
|
|
|
|
|
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
|
|
+
|
|
|
/* free if no reference */
|
|
/* free if no reference */
|
|
|
if (!usage) {
|
|
if (!usage) {
|
|
|
|
|
+ VM_BUG_ON(!swap_info_locked);
|
|
|
mem_cgroup_uncharge_swap(entry);
|
|
mem_cgroup_uncharge_swap(entry);
|
|
|
|
|
+ ci = lock_cluster(p, offset);
|
|
|
dec_cluster_info_page(p, p->cluster_info, offset);
|
|
dec_cluster_info_page(p, p->cluster_info, offset);
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
if (offset < p->lowest_bit)
|
|
if (offset < p->lowest_bit)
|
|
|
p->lowest_bit = offset;
|
|
p->lowest_bit = offset;
|
|
|
if (offset > p->highest_bit) {
|
|
if (offset > p->highest_bit) {
|
|
@@ -829,6 +931,9 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
|
|
|
}
|
|
}
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ if (lock_swap_info)
|
|
|
|
|
+ spin_unlock(&p->lock);
|
|
|
|
|
+
|
|
|
return usage;
|
|
return usage;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -840,11 +945,9 @@ void swap_free(swp_entry_t entry)
|
|
|
{
|
|
{
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
|
|
|
|
|
- p = swap_info_get(entry);
|
|
|
|
|
- if (p) {
|
|
|
|
|
- swap_entry_free(p, entry, 1);
|
|
|
|
|
- spin_unlock(&p->lock);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ p = _swap_info_get(entry);
|
|
|
|
|
+ if (p)
|
|
|
|
|
+ swap_entry_free(p, entry, 1, false);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -854,11 +957,9 @@ void swapcache_free(swp_entry_t entry)
|
|
|
{
|
|
{
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
|
|
|
|
|
- p = swap_info_get(entry);
|
|
|
|
|
- if (p) {
|
|
|
|
|
- swap_entry_free(p, entry, SWAP_HAS_CACHE);
|
|
|
|
|
- spin_unlock(&p->lock);
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ p = _swap_info_get(entry);
|
|
|
|
|
+ if (p)
|
|
|
|
|
+ swap_entry_free(p, entry, SWAP_HAS_CACHE, false);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
/*
|
|
@@ -870,13 +971,17 @@ int page_swapcount(struct page *page)
|
|
|
{
|
|
{
|
|
|
int count = 0;
|
|
int count = 0;
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
swp_entry_t entry;
|
|
swp_entry_t entry;
|
|
|
|
|
+ unsigned long offset;
|
|
|
|
|
|
|
|
entry.val = page_private(page);
|
|
entry.val = page_private(page);
|
|
|
- p = swap_info_get(entry);
|
|
|
|
|
|
|
+ p = _swap_info_get(entry);
|
|
|
if (p) {
|
|
if (p) {
|
|
|
- count = swap_count(p->swap_map[swp_offset(entry)]);
|
|
|
|
|
- spin_unlock(&p->lock);
|
|
|
|
|
|
|
+ offset = swp_offset(entry);
|
|
|
|
|
+ ci = lock_cluster_or_swap_info(p, offset);
|
|
|
|
|
+ count = swap_count(p->swap_map[offset]);
|
|
|
|
|
+ unlock_cluster_or_swap_info(p, ci);
|
|
|
}
|
|
}
|
|
|
return count;
|
|
return count;
|
|
|
}
|
|
}
|
|
@@ -889,22 +994,26 @@ int swp_swapcount(swp_entry_t entry)
|
|
|
{
|
|
{
|
|
|
int count, tmp_count, n;
|
|
int count, tmp_count, n;
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
struct page *page;
|
|
struct page *page;
|
|
|
pgoff_t offset;
|
|
pgoff_t offset;
|
|
|
unsigned char *map;
|
|
unsigned char *map;
|
|
|
|
|
|
|
|
- p = swap_info_get(entry);
|
|
|
|
|
|
|
+ p = _swap_info_get(entry);
|
|
|
if (!p)
|
|
if (!p)
|
|
|
return 0;
|
|
return 0;
|
|
|
|
|
|
|
|
- count = swap_count(p->swap_map[swp_offset(entry)]);
|
|
|
|
|
|
|
+ offset = swp_offset(entry);
|
|
|
|
|
+
|
|
|
|
|
+ ci = lock_cluster_or_swap_info(p, offset);
|
|
|
|
|
+
|
|
|
|
|
+ count = swap_count(p->swap_map[offset]);
|
|
|
if (!(count & COUNT_CONTINUED))
|
|
if (!(count & COUNT_CONTINUED))
|
|
|
goto out;
|
|
goto out;
|
|
|
|
|
|
|
|
count &= ~COUNT_CONTINUED;
|
|
count &= ~COUNT_CONTINUED;
|
|
|
n = SWAP_MAP_MAX + 1;
|
|
n = SWAP_MAP_MAX + 1;
|
|
|
|
|
|
|
|
- offset = swp_offset(entry);
|
|
|
|
|
page = vmalloc_to_page(p->swap_map + offset);
|
|
page = vmalloc_to_page(p->swap_map + offset);
|
|
|
offset &= ~PAGE_MASK;
|
|
offset &= ~PAGE_MASK;
|
|
|
VM_BUG_ON(page_private(page) != SWP_CONTINUED);
|
|
VM_BUG_ON(page_private(page) != SWP_CONTINUED);
|
|
@@ -919,7 +1028,7 @@ int swp_swapcount(swp_entry_t entry)
|
|
|
n *= (SWAP_CONT_MAX + 1);
|
|
n *= (SWAP_CONT_MAX + 1);
|
|
|
} while (tmp_count & COUNT_CONTINUED);
|
|
} while (tmp_count & COUNT_CONTINUED);
|
|
|
out:
|
|
out:
|
|
|
- spin_unlock(&p->lock);
|
|
|
|
|
|
|
+ unlock_cluster_or_swap_info(p, ci);
|
|
|
return count;
|
|
return count;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -1017,7 +1126,7 @@ int free_swap_and_cache(swp_entry_t entry)
|
|
|
|
|
|
|
|
p = swap_info_get(entry);
|
|
p = swap_info_get(entry);
|
|
|
if (p) {
|
|
if (p) {
|
|
|
- if (swap_entry_free(p, entry, 1) == SWAP_HAS_CACHE) {
|
|
|
|
|
|
|
+ if (swap_entry_free(p, entry, 1, true) == SWAP_HAS_CACHE) {
|
|
|
page = find_get_page(swap_address_space(entry),
|
|
page = find_get_page(swap_address_space(entry),
|
|
|
swp_offset(entry));
|
|
swp_offset(entry));
|
|
|
if (page && !trylock_page(page)) {
|
|
if (page && !trylock_page(page)) {
|
|
@@ -2298,6 +2407,9 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
|
|
|
return maxpages;
|
|
return maxpages;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+#define SWAP_CLUSTER_COLS \
|
|
|
|
|
+ DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
|
|
|
|
|
+
|
|
|
static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
|
static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
|
|
union swap_header *swap_header,
|
|
union swap_header *swap_header,
|
|
|
unsigned char *swap_map,
|
|
unsigned char *swap_map,
|
|
@@ -2305,11 +2417,12 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
|
|
unsigned long maxpages,
|
|
unsigned long maxpages,
|
|
|
sector_t *span)
|
|
sector_t *span)
|
|
|
{
|
|
{
|
|
|
- int i;
|
|
|
|
|
|
|
+ unsigned int j, k;
|
|
|
unsigned int nr_good_pages;
|
|
unsigned int nr_good_pages;
|
|
|
int nr_extents;
|
|
int nr_extents;
|
|
|
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
|
|
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
|
|
|
- unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
|
|
|
|
|
|
|
+ unsigned long col = p->cluster_next / SWAPFILE_CLUSTER % SWAP_CLUSTER_COLS;
|
|
|
|
|
+ unsigned long i, idx;
|
|
|
|
|
|
|
|
nr_good_pages = maxpages - 1; /* omit header page */
|
|
nr_good_pages = maxpages - 1; /* omit header page */
|
|
|
|
|
|
|
@@ -2357,15 +2470,20 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
|
|
|
if (!cluster_info)
|
|
if (!cluster_info)
|
|
|
return nr_extents;
|
|
return nr_extents;
|
|
|
|
|
|
|
|
- for (i = 0; i < nr_clusters; i++) {
|
|
|
|
|
- if (!cluster_count(&cluster_info[idx])) {
|
|
|
|
|
|
|
+
|
|
|
|
|
+ /* Reduce false cache line sharing between cluster_info */
|
|
|
|
|
+ for (k = 0; k < SWAP_CLUSTER_COLS; k++) {
|
|
|
|
|
+ j = (k + col) % SWAP_CLUSTER_COLS;
|
|
|
|
|
+ for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
|
|
|
|
|
+ idx = i * SWAP_CLUSTER_COLS + j;
|
|
|
|
|
+ if (idx >= nr_clusters)
|
|
|
|
|
+ continue;
|
|
|
|
|
+ if (cluster_count(&cluster_info[idx]))
|
|
|
|
|
+ continue;
|
|
|
cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
|
|
cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
|
|
|
cluster_list_add_tail(&p->free_clusters, cluster_info,
|
|
cluster_list_add_tail(&p->free_clusters, cluster_info,
|
|
|
idx);
|
|
idx);
|
|
|
}
|
|
}
|
|
|
- idx++;
|
|
|
|
|
- if (idx == nr_clusters)
|
|
|
|
|
- idx = 0;
|
|
|
|
|
}
|
|
}
|
|
|
return nr_extents;
|
|
return nr_extents;
|
|
|
}
|
|
}
|
|
@@ -2468,6 +2586,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
|
|
|
|
|
|
|
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
|
if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
|
|
|
int cpu;
|
|
int cpu;
|
|
|
|
|
+ unsigned long ci, nr_cluster;
|
|
|
|
|
|
|
|
p->flags |= SWP_SOLIDSTATE;
|
|
p->flags |= SWP_SOLIDSTATE;
|
|
|
/*
|
|
/*
|
|
@@ -2475,13 +2594,17 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
|
|
|
* SSD
|
|
* SSD
|
|
|
*/
|
|
*/
|
|
|
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
|
|
p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
|
|
|
|
|
+ nr_cluster = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
|
|
|
|
|
|
|
|
- cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
|
|
|
|
|
- SWAPFILE_CLUSTER) * sizeof(*cluster_info));
|
|
|
|
|
|
|
+ cluster_info = vzalloc(nr_cluster * sizeof(*cluster_info));
|
|
|
if (!cluster_info) {
|
|
if (!cluster_info) {
|
|
|
error = -ENOMEM;
|
|
error = -ENOMEM;
|
|
|
goto bad_swap;
|
|
goto bad_swap;
|
|
|
}
|
|
}
|
|
|
|
|
+
|
|
|
|
|
+ for (ci = 0; ci < nr_cluster; ci++)
|
|
|
|
|
+ spin_lock_init(&((cluster_info + ci)->lock));
|
|
|
|
|
+
|
|
|
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
|
|
p->percpu_cluster = alloc_percpu(struct percpu_cluster);
|
|
|
if (!p->percpu_cluster) {
|
|
if (!p->percpu_cluster) {
|
|
|
error = -ENOMEM;
|
|
error = -ENOMEM;
|
|
@@ -2627,6 +2750,7 @@ void si_swapinfo(struct sysinfo *val)
|
|
|
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
|
static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
|
|
{
|
|
{
|
|
|
struct swap_info_struct *p;
|
|
struct swap_info_struct *p;
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
unsigned long offset, type;
|
|
unsigned long offset, type;
|
|
|
unsigned char count;
|
|
unsigned char count;
|
|
|
unsigned char has_cache;
|
|
unsigned char has_cache;
|
|
@@ -2640,10 +2764,10 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
|
|
goto bad_file;
|
|
goto bad_file;
|
|
|
p = swap_info[type];
|
|
p = swap_info[type];
|
|
|
offset = swp_offset(entry);
|
|
offset = swp_offset(entry);
|
|
|
-
|
|
|
|
|
- spin_lock(&p->lock);
|
|
|
|
|
if (unlikely(offset >= p->max))
|
|
if (unlikely(offset >= p->max))
|
|
|
- goto unlock_out;
|
|
|
|
|
|
|
+ goto out;
|
|
|
|
|
+
|
|
|
|
|
+ ci = lock_cluster_or_swap_info(p, offset);
|
|
|
|
|
|
|
|
count = p->swap_map[offset];
|
|
count = p->swap_map[offset];
|
|
|
|
|
|
|
@@ -2686,7 +2810,7 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
|
|
|
p->swap_map[offset] = count | has_cache;
|
|
p->swap_map[offset] = count | has_cache;
|
|
|
|
|
|
|
|
unlock_out:
|
|
unlock_out:
|
|
|
- spin_unlock(&p->lock);
|
|
|
|
|
|
|
+ unlock_cluster_or_swap_info(p, ci);
|
|
|
out:
|
|
out:
|
|
|
return err;
|
|
return err;
|
|
|
|
|
|
|
@@ -2775,6 +2899,7 @@ EXPORT_SYMBOL_GPL(__page_file_index);
|
|
|
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
|
int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
|
|
{
|
|
{
|
|
|
struct swap_info_struct *si;
|
|
struct swap_info_struct *si;
|
|
|
|
|
+ struct swap_cluster_info *ci;
|
|
|
struct page *head;
|
|
struct page *head;
|
|
|
struct page *page;
|
|
struct page *page;
|
|
|
struct page *list_page;
|
|
struct page *list_page;
|
|
@@ -2798,6 +2923,9 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
offset = swp_offset(entry);
|
|
offset = swp_offset(entry);
|
|
|
|
|
+
|
|
|
|
|
+ ci = lock_cluster(si, offset);
|
|
|
|
|
+
|
|
|
count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
|
|
count = si->swap_map[offset] & ~SWAP_HAS_CACHE;
|
|
|
|
|
|
|
|
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
|
|
if ((count & ~COUNT_CONTINUED) != SWAP_MAP_MAX) {
|
|
@@ -2810,6 +2938,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
if (!page) {
|
|
if (!page) {
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
spin_unlock(&si->lock);
|
|
spin_unlock(&si->lock);
|
|
|
return -ENOMEM;
|
|
return -ENOMEM;
|
|
|
}
|
|
}
|
|
@@ -2858,6 +2987,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
|
|
|
list_add_tail(&page->lru, &head->lru);
|
|
list_add_tail(&page->lru, &head->lru);
|
|
|
page = NULL; /* now it's attached, don't free it */
|
|
page = NULL; /* now it's attached, don't free it */
|
|
|
out:
|
|
out:
|
|
|
|
|
+ unlock_cluster(ci);
|
|
|
spin_unlock(&si->lock);
|
|
spin_unlock(&si->lock);
|
|
|
outer:
|
|
outer:
|
|
|
if (page)
|
|
if (page)
|
|
@@ -2871,7 +3001,8 @@ outer:
|
|
|
* into, carry if so, or else fail until a new continuation page is allocated;
|
|
* into, carry if so, or else fail until a new continuation page is allocated;
|
|
|
* when the original swap_map count is decremented from 0 with continuation,
|
|
* when the original swap_map count is decremented from 0 with continuation,
|
|
|
* borrow from the continuation and report whether it still holds more.
|
|
* borrow from the continuation and report whether it still holds more.
|
|
|
- * Called while __swap_duplicate() or swap_entry_free() holds swap_lock.
|
|
|
|
|
|
|
+ * Called while __swap_duplicate() or swap_entry_free() holds swap or cluster
|
|
|
|
|
+ * lock.
|
|
|
*/
|
|
*/
|
|
|
static bool swap_count_continued(struct swap_info_struct *si,
|
|
static bool swap_count_continued(struct swap_info_struct *si,
|
|
|
pgoff_t offset, unsigned char count)
|
|
pgoff_t offset, unsigned char count)
|