FS-PEP/code/pep-stats-report/app/image_helper.py

import platform
import os
import fitz  # pip install PyMuPDF
from PIL import Image
import shutil
import logging


# 将word文件转换成pdf文件
def word2pdf(word_file):
    from win32com import client  # pip install pywin32

    # 获取word格式处理对象
    word = client.Dispatch('Word.Application')
    # 以Doc对象打开文件
    doc_ = word.Documents.Open(word_file)
    # 另存为pdf文件
    pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")
    doc_.SaveAs(pdf_file, FileFormat=17)
    logging.info(f'{word_file} ----转pdf成功')
    # 关闭doc对象
    doc_.Close()
    # 退出word对象
    word.Quit()
    return pdf_file


# 将word文件转换成pdf文件（Linux）
def word2pdf_linux(word_file):
    word_path = os.path.dirname(word_file)
    os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}")
    logging.info(f'{word_file} ----转pdf成功')
    pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")
    return pdf_file


# pdf转图片
def pdf2png(pdf_file):
    image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png')
    try:
        # 创建一个空白图片，用于拼接内容
        width, height = 0, 0
        images = []

        pdf_doc = fitz.open(pdf_file)
        for pg in range(pdf_doc.page_count):
            page = pdf_doc[pg]
            rotate = int(0)
            # 每个尺寸的缩放系数为1.3，这将为我们生成分辨率提高2.6的图像。
            # 此处若是不做设置，默认图片大小为：792X612, dpi=96
            zoom_x = 1.33333333  # (1.33333333-->1056x816)   (2-->1584x1224)
            zoom_y = 1.33333333
            mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
            pix = page.get_pixmap(matrix=mat, alpha=False)

            if not os.path.exists(image_path):  # 判断存放图片的文件夹是否存在
                os.makedirs(image_path)  # 若图片文件夹不存在就创建
            pix.save(image_path + '/' + 'tmp%s.png' % pg)  # 将图片写入指定的文件夹内

            img = Image.open(image_path + '/' + 'tmp%s.png' % pg)
            img_width, img_height = img.size

            # 更新拼接图片的宽度和高度
            width = max(width, img_width)
            height += img_height

            # 添加图片到拼接列表
            images.append(img)

        # 创建一个空白长图
        long_image = Image.new('RGB', (width, height), (255, 255, 255))
        y_offset = 0

        # 将每张图片拼接到长图中
        for img in images:
            long_image.paste(img, (0, y_offset))
            y_offset += img.height

        # 保存拼接后的长图
        png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png")
        long_image.save(png_file)

        # 删除中间临时保存的图片
        shutil.rmtree(image_path)
    except IOError as error:
        logging.error('pdf转png失败')
        raise error
    else:
        logging.info("pdf转png成功")
        return png_file


def word_to_long_image(word_file_path):
    try:
        _file = os.path.abspath(word_file_path)  # os.path.abspath('input.docx')

        if platform.system().lower() == 'windows':
            pdf_file = word2pdf(_file)
        else:
            pdf_file = word2pdf_linux(_file)

        png_file = pdf2png(pdf_file)

        # 删除中间保存的pdf文件
        os.remove(pdf_file)

        return png_file
    except Exception as error:
        logging.error('word转长图出错：{}'.format(error))
        raise error
append 1 year ago			`import platform`
			`import os`
			`import fitz # pip install PyMuPDF`
			`from PIL import Image`
			`import shutil`
			`import logging`


			`# 将word文件转换成pdf文件`
			`def word2pdf(word_file):`
			`from win32com import client # pip install pywin32`

			`# 获取word格式处理对象`
			`word = client.Dispatch('Word.Application')`
			`# 以Doc对象打开文件`
			`doc_ = word.Documents.Open(word_file)`
			`# 另存为pdf文件`
			`pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")`
			`doc_.SaveAs(pdf_file, FileFormat=17)`
			`logging.info(f'{word_file} ----转pdf成功')`
			`# 关闭doc对象`
			`doc_.Close()`
			`# 退出word对象`
			`word.Quit()`
			`return pdf_file`


			`# 将word文件转换成pdf文件（Linux）`
			`def word2pdf_linux(word_file):`
			`word_path = os.path.dirname(word_file)`
			`os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}")`
			`logging.info(f'{word_file} ----转pdf成功')`
			`pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")`
			`return pdf_file`


			`# pdf转图片`
			`def pdf2png(pdf_file):`
			`image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png')`
			`try:`
			`# 创建一个空白图片，用于拼接内容`
			`width, height = 0, 0`
			`images = []`

			`pdf_doc = fitz.open(pdf_file)`
			`for pg in range(pdf_doc.page_count):`
			`page = pdf_doc[pg]`
			`rotate = int(0)`
			`# 每个尺寸的缩放系数为1.3，这将为我们生成分辨率提高2.6的图像。`
			`# 此处若是不做设置，默认图片大小为：792X612, dpi=96`
			`zoom_x = 1.33333333 # (1.33333333-->1056x816) (2-->1584x1224)`
			`zoom_y = 1.33333333`
			`mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)`
			`pix = page.get_pixmap(matrix=mat, alpha=False)`

			`if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在`
			`os.makedirs(image_path) # 若图片文件夹不存在就创建`
			`pix.save(image_path + '/' + 'tmp%s.png' % pg) # 将图片写入指定的文件夹内`

			`img = Image.open(image_path + '/' + 'tmp%s.png' % pg)`
			`img_width, img_height = img.size`

			`# 更新拼接图片的宽度和高度`
			`width = max(width, img_width)`
			`height += img_height`

			`# 添加图片到拼接列表`
			`images.append(img)`

			`# 创建一个空白长图`
			`long_image = Image.new('RGB', (width, height), (255, 255, 255))`
			`y_offset = 0`

			`# 将每张图片拼接到长图中`
			`for img in images:`
			`long_image.paste(img, (0, y_offset))`
			`y_offset += img.height`

			`# 保存拼接后的长图`
			`png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png")`
			`long_image.save(png_file)`

			`# 删除中间临时保存的图片`
			`shutil.rmtree(image_path)`
			`except IOError as error:`
			`logging.error('pdf转png失败')`
			`raise error`
			`else:`
			`logging.info("pdf转png成功")`
			`return png_file`


			`def word_to_long_image(word_file_path):`
			`try:`
			`_file = os.path.abspath(word_file_path) # os.path.abspath('input.docx')`

			`if platform.system().lower() == 'windows':`
			`pdf_file = word2pdf(_file)`
			`else:`
			`pdf_file = word2pdf_linux(_file)`

			`png_file = pdf2png(pdf_file)`

			`# 删除中间保存的pdf文件`
			`os.remove(pdf_file)`

			`return png_file`
			`except Exception as error:`
			`logging.error('word转长图出错：{}'.format(error))`
			`raise error`