有序队列:queue

有序队列:queue

在大佬博客发现的这个有序队列,挺有意思的

Python queue模块的FIFO队列先进先出。 queue.Queue(maxsize)

LIFO类似于堆,即先进后出。 queue.LifoQueue(maxsize)

还有一种是优先级队列级别越低越先出来。 queue.PriorityQueue(maxsize)

queue.PriorityQueue(maxsize)

1
2
3
4
5
6
7
8
9
10
11
12
13
import queue

q = queue.PriorityQueue(5)
q.put((5, 5454))
q.put((2, 532566))
q.put((3, 555))
q.put((7, 344))
print(q.get())
print(q.get())

-----------
(2, 532566)
(3, 555)

我的py

按小说章节ID排序(1, ‘…’)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import time
import requests
from lxml import etree
from multiprocessing.dummy import Pool as ThreadPool
from urllib.parse import urljoin
from queue import PriorityQueue

# 有序队列,可以全局使用, 用来储存章节内容
queue = PriorityQueue()
# 目录页url
start_url = 'http://www.ybiquge.com/95_95524/'
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
}


# getURL
def get_url(url):
print("访问url:{}".format(url))
try:
res = requests.get(url, headers)
# tree = etree.HTML(res)
return res.text
except:
return False


# 获取小说章节链接
def get_page():
# get
page_html = get_url(start_url)
if page_html:
# lxml解析
page_tree = etree.HTML(page_html)
# 书名
book_name = page_tree.xpath('//div[@id="info"]/h1/text()')[0]
print(book_name)
# 章节链接
page_url_lists = page_tree.xpath('//div[@id="list"]/dl/dd/a/@href')
return book_name, page_url_lists
else:
return False


# 获取小说内容
def get_text(url):
# 先拼接URL
url = urljoin(start_url, url)
# get
zj_html = get_url(url)
if zj_html:
# lxml解析
page_tree = etree.HTML(zj_html)
# 章节名
zj_name = page_tree.xpath('//div[@class="bookname"]/h1/text()')[0]
print(zj_name)
# 内容
zj_text_list = page_tree.xpath('//div[@id="content"]/text()')
# 遍历合并内容
zj_text = ''
for text in zj_text_list:
zj_text = zj_text + text + '\n'
print('内容:', zj_text[0:15], '...')
# 有序列队提交
# 匹配章节id
zj_id = url.split('/')[-1].replace('.html', '')
# 开始提交, 序号+内容
queue.put((int(zj_id), zj_name+'\n'+zj_text+'\n'))
# 设置延迟(理性一点)
time.sleep(1)


# 下载
def down_text(name, texts):
path = name + '.txt'
with open(path, 'a', encoding='utf-8') as a:
a.writelines(texts)


if __name__ == '__main__':
start_time = time.time()
# 获取小说名字,章节链接list
xs_book_name, zj_lists = get_page()
# 实例化 Pool 对象,设置线程4
pool = ThreadPool(4)
# 设置任务
pool.map(get_text, zj_lists)
# 关闭线程
pool.close()
pool.join()
# 有序获取章节内容
print("正在保存小说,请稍后...")
content = ""
# queue.empty() 如果队列为空,返回True
while not queue.empty():
# get有序获取
next_obj = queue.get()
# print(next_obj[0])
content += next_obj[1]
# 保存
down_text(xs_book_name, content)
print(xs_book_name, "下载完成!")
print("共用时:", time.time()-start_time)

写在后面

不知不觉写了好多下载小说的py了

爱好看小说,没别的意思!


本博客所有文章除特别声明外,均采用 CC BY-SA 4.0 协议 ,转载请注明出处!