排序总结

对数器:

对数器用来检测自定义排序算法是否正确。以下排序算法都经过10万个随机数组测试。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
import random


class Comparator():
def compare(self, arr):
return sorted(arr)

def generate_arr(self, max_length, max_value):
length = random.randint(0, max_length)
arr = [random.randint(-max_value, max_value) for i in range(length)]

return arr

def confirm(self, arr1, arr2):
return arr1 == arr2

1. 堆排序

下面这个堆排序写复杂了!!!!,堆还有一个性质是可以利用的:当用数组表示存储n个元素的堆时,叶节点下标分别是[n//2, n//2+1, …, n-1],这样的话我们构建堆的时候只要自下往上对不是叶节点的坐标执行heapify操作就可以了。

堆是一个极其重要的数据结构,堆排序主要是利用了堆的思想,其时间复杂度为O(N*logN),额外空间复杂度为O(1)

  1.将数组调整成大根堆:

    在数组中,如果我们把index i位置的数字看作是根节点,那么它的左子节点在index (2 * i) + 1位置,右子节点在index (2 * i) + 2位置。反之index i位置的节点的父节点的位置在(i - 1) // 2位置。遍历数组添加元素,对每个新添加的元素执行heap_insert操作,从加入点开始上浮,直到上浮到合适位置;

  2.将大根堆顶与数组中最后一个元素交换,然后在不包括最后一个元素的数组区间执行heapify操作

    在堆顶与数组中最后一个元素交换后,我们需要重新调整堆,使得堆顶最大;

  3.重复2,直到可执行区间为1

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
class HeapSort():

def heapsort(self, arr):
if arr is None or len(arr) < 2:
return arr

length = len(arr)
for i in range(length):
self.heap_insert(arr, i)


self.swap(arr, 0, length - 1)
size = length - 2
while size > 0:
self.heapify(arr, 0, size)
self.swap(arr, 0, size)
size -= 1

def heap_insert(self, arr, i):

while i > 0:
parent = (i - 1) // 2
if arr[parent] > arr[i]:
break
else:
self.swap(arr, parent, i)
i = parent

def heapify(self, arr, i, size):
left = 2 * i + 1
while left <= size:
right = 2 * i + 2
largest = right if right <= size and arr[right] >= arr[left] else left
largest = largest if arr[largest] >= arr[i] else i
if largest == i:
break
self.swap(arr, largest, i)
i = largest
left = 2 * i + 1

def swap(self, arr, i, j):
arr[i], arr[j] = arr[j], arr[i]

下面是严格根据算法导论实现的堆排序,其中max_heapify的过程写了递归和非递归两个版本,实测排序10万个随机数组,非递归版本和递归版本分别需要24秒和27秒。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# -*- coding=utf-8 -*-
import time


class HeapSort():

def heapsort(self, arr):
if arr is None or len(arr) < 2:
return arr

self._size = self._length = len(arr)
self.build_max_heap(arr)
for i in range(self._length-1, 0, -1):
self.swap(arr, i, 0)
self._size -= 1
self.max_heapify(arr, 0)

def build_max_heap(self, arr):
for i in range(self._length//2-1, -1, -1):
self.max_heapify(arr, i)

def max_heapify(self, arr, i):
left = 2 * i + 1
while left < self._size:
right = 2 * i + 2
largest = right if right < self._size and arr[right] >= arr[left] else left
largest = largest if arr[largest] >= arr[i] else i
if largest == i:
break
self.swap(arr, largest, i)
i = largest
left = 2 * i + 1

def max_heapify_recur(self, arr, i):
left = 2 * i + 1
right = 2 * i + 2
if left < self._size and arr[left] > arr[i]:
largest = left
else:
largest = i

if right < self._size and arr[right] > arr[largest]:
largest = right

if largest != i:
self.swap(arr, largest, i)
self.max_heapify_recur(arr, largest)

def swap(self, arr, i, j):
arr[i], arr[j] = arr[j], arr[i]


if __name__ == "__main__":
from comparator import Comparator
com = Comparator()
ex = HeapSort()
start = time.time()
for i in range(100000):
arr = com.generate_arr(100, 100)
arr1 = list(arr)

ex.heapsort(arr1)
arr2 = com.compare(arr)

if not com.confirm(arr1, arr2):
print('oops')
print(arr, arr1, arr2)
print(time.time()-start)

2. 快排

随机快排就是随机+partition的过程。partition过程其实就是荷兰国旗问题。

代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import random


class QuickSort():
def quick_min(self, arr):
if arr is None or len(arr) < 2:
return arr

self.quick_sort(arr, 0, len(arr) - 1)

def quick_sort(self, arr, l, r):
if l < r:
self.swap(arr, random.randint(l, r), r)
left, right = self.partation(arr, l, r)
self.quick_sort(arr, l, left)
self.quick_sort(arr, right, r)

def partation(self, arr, l, r):
left = l - 1
right = r
while l < right:
if arr[l] < arr[r]:
left += 1
self.swap(arr, l, left)
l += 1
elif arr[l] == arr[r]:
l += 1
else:
right -= 1
self.swap(arr, l, right)

self.swap(arr, right, r)
return left, right + 1

def swap(self, arr, i, j):
arr[i], arr[j] = arr[j], arr[i]

3. 归并

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import time


class MergeSort():
def merge_main(self, nums):
length = len(nums)
if length <= 1:
return nums
start = 0
end = length - 1
self.merge_sort_recur(nums, start, end)

def merge_sort(self, nums):
length = len(nums)
if length <= 1:
return nums
i = 1
while i < length:
start = 0
while start < length:
middle = start + i - 1
end = min(start + 2 * i - 1, length - 1)
if middle < end:
self.merge(nums, start, end, middle)
start += 2 * i
i *= 2

def merge_sort_recur(self, nums, start, end):
if start == end:
return
middle = start + ((end - start) >> 1)
self.merge_sort_recur(nums, start, middle)
self.merge_sort_recur(nums, middle+1, end)
self.merge(nums, start, end, middle)

def merge(self, nums, start, end, middle):
cur_left = start
cur_right = middle + 1
temp_list = []
while cur_left <= middle and cur_right <= end:
if nums[cur_left] <= nums[cur_right]:
temp_list.append(nums[cur_left])
cur_left += 1
else:
temp_list.append(nums[cur_right])
cur_right += 1

while cur_left <= middle:
temp_list.append(nums[cur_left])
cur_left += 1
while cur_right <= end:
temp_list.append(nums[cur_right])
cur_right += 1

for i in range(start, end+1):
nums[i] = temp_list[i - start]


def main():
from comparator import Comparator
com = Comparator()
ex = MergeSort()
start = time.time()
for i in range(100000):
arr = com.generate_arr(100, 100)
arr1 = list(arr)

ex.merge_sort(arr)
arr2 = com.compare(arr1)

if not com.confirm(arr, arr2):
print('oops')
print(arr1, arr, arr2)
print(time.time()-start)

for i in range(100000):
arr = com.generate_arr(100, 100)
arr1 = list(arr)

ex.merge_main(arr)
arr2 = com.compare(arr1)

if not com.confirm(arr, arr2):
print('oops')
print(arr1, arr, arr2)
print(time.time()-start)


if __name__ == '__main__':
main()

output:

1
2
21.85367202758789
42.81224775314331

可以看出非递归版还是快啊,10万个最大长度为100的数组,时间差达到非递归用时了。。

  • 本文作者: 大护法
  • 本文链接: https://todebug.com/summary-of-sort/
  • 版权声明: 本博客所有文章除特别声明外,均采用 BY-NC-SA 4.0 许可协议。转载请注明出处!
0%