Published on

python_多进程为什么慢

笔记: https://www.bilibili.com/video/BV1q94y1N7pJ

v1 代码 -串行处理


import time
from utils import is_prime

MAX_NUMBER = 100000
primes = []

start = time.time()

for i in range(1, MAX_NUMBER):
    if is_prime(i):
        primes.append(i)


print(f"Took {time.time() - start} seconds")
print(f"Got {len(primes)} primes")


"""""
Took 0.06995820999145508 seconds
Got 9592 primes
"""""

v2代码 -并行处理

import multiprocessing
from utils import is_prime
import time

MAX_NUMBER = 100000

primes = []

def work(inq, outq):
    """
    work从 输入队列中取任务执行,如果从队列中获得None,则该进程的任务结束
    """
    while n := inq.get():
        if is_prime(n):
            outq.put(n)
    outq.put(None)


if __name__ == "__main__":
    start_time = time.time()

    inq = multiprocessing.Queue()
    outq = multiprocessing.Queue()

    workers = [ multiprocessing.Process(target=work, args=(inq, outq)) for _ in range(4) ]

    for w in workers:
        w.start()

    for i in range(1, MAX_NUMBER):
        inq.put(i)

    # 最后在input队列塞 4个 None
    for _ in range(4):
        inq.put(None)

    finish = 0
    while finish < 4:
        if n := outq.get():
            primes.append(n)
        else:
            finish += 1

    print(f"Took {time.time() - start_time} seconds")
    print(f"Got {len(primes)} primes")


    """
    Took 1.1287569999694824 seconds
    Got 9592 primes
    """

get函数执行事件,远超过is_prime函数. 事件都浪费在通讯上。

v3 代码-等份切分数据

减少通讯的次数以及数据 每次输入队列一个元祖,代表list中起止索引

import multiprocessing
from utils import is_prime
import time

MAX_NUMBER = 1000000

primes = []

def work(inq, outq):
    """
    work从 输入队列中取任务执行,如果从队列中获得None,则该进程的任务结束
    """
    while n := inq.get():
        start, end = n
        primes = []
        for i in range(start, end):
            if is_prime(i):
                primes.append(i)
        outq.put(primes)
    outq.put(None)


if __name__ == "__main__":
    start_time = time.time()

    inq = multiprocessing.Queue()
    outq = multiprocessing.Queue()

    workers = [ multiprocessing.Process(target=work, args=(inq, outq)) for _ in range(4) ]

    for w in workers:
        w.start()

    # for i in range(1, MAX_NUMBER):
    #     inq.put(i)
    for i in range(4):
        inq.put((i * MAX_NUMBER//4, (i + 1) * MAX_NUMBER//4))

    # 最后在input队列塞 4个 None
    for _ in range(4):
        inq.put(None)

    finish = 0
    while finish < 4:
        if n := outq.get():
            # primes.append(n)
            primes.extend(n)
        else:
            finish += 1

    print(f"Took {time.time() - start_time} seconds")
    print(f"Got {len(primes)} primes")


    """
    Took 0.06907486915588379 seconds
    Got 9592 primes
    """

分配的任务不均匀,各个work的运行时间不同。

v4代码 -使用pool map function

使用

import multiprocessing
from utils import is_prime
import time

MAX_NUMBER = 100000
primes = []

start_time = time.time()


if __name__ == "__main__":
    with multiprocessing.Pool(4) as pool:
        primes = [
            n+1 for n, prime in enumerate(pool.map(is_prime, range(1, MAX_NUMBER))) if prime
        ]


    print(f"Took {time.time() - start_time} seconds")
    print(f"Got {len(primes)} primes")

    """
    Took 0.08679699897766113 seconds
    Got 9592 primes
    """

数据越大 区别越明显