generated from container/tmpl
Julin
2 years ago
6 changed files with 229 additions and 24 deletions
@ -0,0 +1,110 @@ |
|||||
|
import platform |
||||
|
import os |
||||
|
import fitz # pip install PyMuPDF |
||||
|
from PIL import Image |
||||
|
import shutil |
||||
|
import logging |
||||
|
|
||||
|
|
||||
|
# 将word文件转换成pdf文件 |
||||
|
def word2pdf(word_file): |
||||
|
from win32com import client # pip install pywin32 |
||||
|
|
||||
|
# 获取word格式处理对象 |
||||
|
word = client.Dispatch('Word.Application') |
||||
|
# 以Doc对象打开文件 |
||||
|
doc_ = word.Documents.Open(word_file) |
||||
|
# 另存为pdf文件 |
||||
|
pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") |
||||
|
doc_.SaveAs(pdf_file, FileFormat=17) |
||||
|
logging.info(f'{word_file} ----转pdf成功') |
||||
|
# 关闭doc对象 |
||||
|
doc_.Close() |
||||
|
# 退出word对象 |
||||
|
word.Quit() |
||||
|
return pdf_file |
||||
|
|
||||
|
|
||||
|
# 将word文件转换成pdf文件(Linux) |
||||
|
def word2pdf_linux(word_file): |
||||
|
word_path = os.path.dirname(word_file) |
||||
|
os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}") |
||||
|
logging.info(f'{word_file} ----转pdf成功') |
||||
|
pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") |
||||
|
return pdf_file |
||||
|
|
||||
|
|
||||
|
# pdf转图片 |
||||
|
def pdf2png(pdf_file): |
||||
|
image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png') |
||||
|
try: |
||||
|
# 创建一个空白图片,用于拼接内容 |
||||
|
width, height = 0, 0 |
||||
|
images = [] |
||||
|
|
||||
|
pdf_doc = fitz.open(pdf_file) |
||||
|
for pg in range(pdf_doc.page_count): |
||||
|
page = pdf_doc[pg] |
||||
|
rotate = int(0) |
||||
|
# 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 |
||||
|
# 此处若是不做设置,默认图片大小为:792X612, dpi=96 |
||||
|
zoom_x = 1.33333333 # (1.33333333-->1056x816) (2-->1584x1224) |
||||
|
zoom_y = 1.33333333 |
||||
|
mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate) |
||||
|
pix = page.get_pixmap(matrix=mat, alpha=False) |
||||
|
|
||||
|
if not os.path.exists(image_path): # 判断存放图片的文件夹是否存在 |
||||
|
os.makedirs(image_path) # 若图片文件夹不存在就创建 |
||||
|
pix.save(image_path + '/' + 'tmp%s.png' % pg) # 将图片写入指定的文件夹内 |
||||
|
|
||||
|
img = Image.open(image_path + '/' + 'tmp%s.png' % pg) |
||||
|
img_width, img_height = img.size |
||||
|
|
||||
|
# 更新拼接图片的宽度和高度 |
||||
|
width = max(width, img_width) |
||||
|
height += img_height |
||||
|
|
||||
|
# 添加图片到拼接列表 |
||||
|
images.append(img) |
||||
|
|
||||
|
# 创建一个空白长图 |
||||
|
long_image = Image.new('RGB', (width, height), (255, 255, 255)) |
||||
|
y_offset = 0 |
||||
|
|
||||
|
# 将每张图片拼接到长图中 |
||||
|
for img in images: |
||||
|
long_image.paste(img, (0, y_offset)) |
||||
|
y_offset += img.height |
||||
|
|
||||
|
# 保存拼接后的长图 |
||||
|
png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png") |
||||
|
long_image.save(png_file) |
||||
|
|
||||
|
# 删除中间临时保存的图片 |
||||
|
shutil.rmtree(image_path) |
||||
|
except IOError as error: |
||||
|
logging.error('pdf转png失败') |
||||
|
raise error |
||||
|
else: |
||||
|
logging.info("pdf转png成功") |
||||
|
return png_file |
||||
|
|
||||
|
|
||||
|
def word_to_long_image(word_file_path): |
||||
|
try: |
||||
|
_file = os.path.abspath(word_file_path) # os.path.abspath('input.docx') |
||||
|
|
||||
|
if platform.system().lower() == 'windows': |
||||
|
pdf_file = word2pdf(_file) |
||||
|
else: |
||||
|
pdf_file = word2pdf_linux(_file) |
||||
|
|
||||
|
png_file = pdf2png(pdf_file) |
||||
|
|
||||
|
# 删除中间保存的pdf文件 |
||||
|
os.remove(pdf_file) |
||||
|
|
||||
|
return png_file |
||||
|
except Exception as error: |
||||
|
logging.error('word转长图出错:{}'.format(error)) |
||||
|
raise error |
Loading…
Reference in new issue