|
|
|
import platform
|
|
|
|
import os
|
|
|
|
import fitz # pip install PyMuPDF
|
|
|
|
from PIL import Image
|
|
|
|
import shutil
|
|
|
|
import logging
|
|
|
|
|
|
|
|
|
|
|
|
# 将word文件转换成pdf文件
|
|
|
|
def word2pdf(word_file):
|
|
|
|
from win32com import client # pip install pywin32
|
|
|
|
|
|
|
|
# 获取word格式处理对象
|
|
|
|
word = client.Dispatch('Word.Application')
|
|
|
|
# 以Doc对象打开文件
|
|
|
|
doc_ = word.Documents.Open(word_file)
|
|
|
|
# 另存为pdf文件
|
|
|
|
pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")
|
|
|
|
doc_.SaveAs(pdf_file, FileFormat=17)
|
|
|
|
logging.info(f'{word_file} ----转pdf成功')
|
|
|
|
# 关闭doc对象
|
|
|
|
doc_.Close()
|
|
|
|
# 退出word对象
|
|
|
|
word.Quit()
|
|
|
|
return pdf_file
|
|
|
|
|
|
|
|
|
|
|
|
# 将word文件转换成pdf文件(Linux)
|
|
|
|
def word2pdf_linux(word_file):
|
|
|
|
word_path = os.path.dirname(word_file)
|
|
|
|
os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}")
|
|
|
|
logging.info(f'{word_file} ----转pdf成功')
|
|
|
|
pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf")
|
|
|
|
return pdf_file
|
|
|
|
|
|
|
|
|
|
|
|
# pdf转图片
|
|
|
|
def pdf2png(pdf_file):
|
|
|
|
image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png')
|
|
|
|
try:
|
|
|
|
# 创建一个空白图片,用于拼接内容
|
|
|
|
width, height = 0, 0
|
|
|
|
images = []
|
|
|
|
|
|
|
|
pdf_doc = fitz.open(pdf_file)
|
|
|
|
for pg in range(pdf_doc.page_count):
|
|
|
|
page = pdf_doc[pg]
|
|
|
|
rotate = int(0)
|
|
|
|
# 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。
|
|
|
|
# 此处若是不做设置,默认图片大小为:792X612, dpi=96
|
|
|
|
zoom_x = 1.33333333 # (1.33333333-->1056x816) (2-->1584x1224)
|
|
|
|
zoom_y = 1.33333333
|
|
|
|
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
|
|
|
|
pix = page.get_pixmap(matrix=mat, alpha=False)
|
|
|
|
|
|
|
|
if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在
|
|
|
|
os.makedirs(image_path) # 若图片文件夹不存在就创建
|
|
|
|
pix.save(image_path + '/' + 'tmp%s.png' % pg) # 将图片写入指定的文件夹内
|
|
|
|
|
|
|
|
img = Image.open(image_path + '/' + 'tmp%s.png' % pg)
|
|
|
|
img_width, img_height = img.size
|
|
|
|
|
|
|
|
# 更新拼接图片的宽度和高度
|
|
|
|
width = max(width, img_width)
|
|
|
|
height += img_height
|
|
|
|
|
|
|
|
# 添加图片到拼接列表
|
|
|
|
images.append(img)
|
|
|
|
|
|
|
|
# 创建一个空白长图
|
|
|
|
long_image = Image.new('RGB', (width, height), (255, 255, 255))
|
|
|
|
y_offset = 0
|
|
|
|
|
|
|
|
# 将每张图片拼接到长图中
|
|
|
|
for img in images:
|
|
|
|
long_image.paste(img, (0, y_offset))
|
|
|
|
y_offset += img.height
|
|
|
|
|
|
|
|
# 保存拼接后的长图
|
|
|
|
png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png")
|
|
|
|
long_image.save(png_file)
|
|
|
|
|
|
|
|
# 删除中间临时保存的图片
|
|
|
|
shutil.rmtree(image_path)
|
|
|
|
except IOError as error:
|
|
|
|
logging.error('pdf转png失败')
|
|
|
|
raise error
|
|
|
|
else:
|
|
|
|
logging.info("pdf转png成功")
|
|
|
|
return png_file
|
|
|
|
|
|
|
|
|
|
|
|
def word_to_long_image(word_file_path):
|
|
|
|
try:
|
|
|
|
_file = os.path.abspath(word_file_path) # os.path.abspath('input.docx')
|
|
|
|
|
|
|
|
if platform.system().lower() == 'windows':
|
|
|
|
pdf_file = word2pdf(_file)
|
|
|
|
else:
|
|
|
|
pdf_file = word2pdf_linux(_file)
|
|
|
|
|
|
|
|
png_file = pdf2png(pdf_file)
|
|
|
|
|
|
|
|
# 删除中间保存的pdf文件
|
|
|
|
os.remove(pdf_file)
|
|
|
|
|
|
|
|
return png_file
|
|
|
|
except Exception as error:
|
|
|
|
logging.error('word转长图出错:{}'.format(error))
|
|
|
|
raise error
|