import platform import os import fitz # pip install PyMuPDF from PIL import Image import shutil import logging # 将word文件转换成pdf文件 def word2pdf(word_file): from win32com import client # pip install pywin32 # 获取word格式处理对象 word = client.Dispatch('Word.Application') # 以Doc对象打开文件 doc_ = word.Documents.Open(word_file) # 另存为pdf文件 pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") doc_.SaveAs(pdf_file, FileFormat=17) logging.info(f'{word_file} ----转pdf成功') # 关闭doc对象 doc_.Close() # 退出word对象 word.Quit() return pdf_file # 将word文件转换成pdf文件(Linux) def word2pdf_linux(word_file): word_path = os.path.dirname(word_file) os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}") logging.info(f'{word_file} ----转pdf成功') pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") return pdf_file # pdf转图片 def pdf2png(pdf_file): image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png') try: # 创建一个空白图片,用于拼接内容 width, height = 0, 0 images = [] pdf_doc = fitz.open(pdf_file) for pg in range(pdf_doc.page_count): page = pdf_doc[pg] rotate = int(0) # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 # 此处若是不做设置,默认图片大小为:792X612, dpi=96 zoom_x = 1.33333333 # (1.33333333-->1056x816) (2-->1584x1224) zoom_y = 1.33333333 mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate) pix = page.get_pixmap(matrix=mat, alpha=False) if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在 os.makedirs(image_path) # 若图片文件夹不存在就创建 pix.save(image_path + '/' + 'tmp%s.png' % pg) # 将图片写入指定的文件夹内 img = Image.open(image_path + '/' + 'tmp%s.png' % pg) img_width, img_height = img.size # 更新拼接图片的宽度和高度 width = max(width, img_width) height += img_height # 添加图片到拼接列表 images.append(img) # 创建一个空白长图 long_image = Image.new('RGB', (width, height), (255, 255, 255)) y_offset = 0 # 将每张图片拼接到长图中 for img in images: long_image.paste(img, (0, y_offset)) y_offset += img.height # 保存拼接后的长图 png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png") long_image.save(png_file) # 删除中间临时保存的图片 shutil.rmtree(image_path) except IOError as error: logging.error('pdf转png失败') raise error else: logging.info("pdf转png成功") return png_file def word_to_long_image(word_file_path): try: _file = os.path.abspath(word_file_path) # os.path.abspath('input.docx') if platform.system().lower() == 'windows': pdf_file = word2pdf(_file) else: pdf_file = word2pdf_linux(_file) png_file = pdf2png(pdf_file) # 删除中间保存的pdf文件 os.remove(pdf_file) return png_file except Exception as error: logging.error('word转长图出错:{}'.format(error)) raise error