generated from container/tmpl
			
				 6 changed files with 229 additions and 24 deletions
			
			
		| @ -0,0 +1,110 @@ | |||||
|  | import platform | ||||
|  | import os | ||||
|  | import fitz  # pip install PyMuPDF | ||||
|  | from PIL import Image | ||||
|  | import shutil | ||||
|  | import logging | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | # 将word文件转换成pdf文件 | ||||
|  | def word2pdf(word_file): | ||||
|  |     from win32com import client  # pip install pywin32 | ||||
|  | 
 | ||||
|  |     # 获取word格式处理对象 | ||||
|  |     word = client.Dispatch('Word.Application') | ||||
|  |     # 以Doc对象打开文件 | ||||
|  |     doc_ = word.Documents.Open(word_file) | ||||
|  |     # 另存为pdf文件 | ||||
|  |     pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") | ||||
|  |     doc_.SaveAs(pdf_file, FileFormat=17) | ||||
|  |     logging.info(f'{word_file} ----转pdf成功') | ||||
|  |     # 关闭doc对象 | ||||
|  |     doc_.Close() | ||||
|  |     # 退出word对象 | ||||
|  |     word.Quit() | ||||
|  |     return pdf_file | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | # 将word文件转换成pdf文件(Linux) | ||||
|  | def word2pdf_linux(word_file): | ||||
|  |     word_path = os.path.dirname(word_file) | ||||
|  |     os.system(f"libreoffice --headless --language=zh-CN --convert-to pdf {word_file} --outdir {word_path}") | ||||
|  |     logging.info(f'{word_file} ----转pdf成功') | ||||
|  |     pdf_file = word_file.replace(os.path.basename(word_file).split('.')[1], "pdf") | ||||
|  |     return pdf_file | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | # pdf转图片 | ||||
|  | def pdf2png(pdf_file): | ||||
|  |     image_path = os.path.abspath(f'{os.path.dirname(pdf_file)}/tmp_pdf2png') | ||||
|  |     try: | ||||
|  |         # 创建一个空白图片,用于拼接内容 | ||||
|  |         width, height = 0, 0 | ||||
|  |         images = [] | ||||
|  | 
 | ||||
|  |         pdf_doc = fitz.open(pdf_file) | ||||
|  |         for pg in range(pdf_doc.page_count): | ||||
|  |             page = pdf_doc[pg] | ||||
|  |             rotate = int(0) | ||||
|  |             # 每个尺寸的缩放系数为1.3,这将为我们生成分辨率提高2.6的图像。 | ||||
|  |             # 此处若是不做设置,默认图片大小为:792X612, dpi=96 | ||||
|  |             zoom_x = 1.33333333  # (1.33333333-->1056x816)   (2-->1584x1224) | ||||
|  |             zoom_y = 1.33333333 | ||||
|  |             mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate) | ||||
|  |             pix = page.get_pixmap(matrix=mat, alpha=False) | ||||
|  | 
 | ||||
|  |             if not os.path.exists(image_path):  # 判断存放图片的文件夹是否存在 | ||||
|  |                 os.makedirs(image_path)  # 若图片文件夹不存在就创建 | ||||
|  |             pix.save(image_path + '/' + 'tmp%s.png' % pg)  # 将图片写入指定的文件夹内 | ||||
|  | 
 | ||||
|  |             img = Image.open(image_path + '/' + 'tmp%s.png' % pg) | ||||
|  |             img_width, img_height = img.size | ||||
|  | 
 | ||||
|  |             # 更新拼接图片的宽度和高度 | ||||
|  |             width = max(width, img_width) | ||||
|  |             height += img_height | ||||
|  | 
 | ||||
|  |             # 添加图片到拼接列表 | ||||
|  |             images.append(img) | ||||
|  | 
 | ||||
|  |         # 创建一个空白长图 | ||||
|  |         long_image = Image.new('RGB', (width, height), (255, 255, 255)) | ||||
|  |         y_offset = 0 | ||||
|  | 
 | ||||
|  |         # 将每张图片拼接到长图中 | ||||
|  |         for img in images: | ||||
|  |             long_image.paste(img, (0, y_offset)) | ||||
|  |             y_offset += img.height | ||||
|  | 
 | ||||
|  |         # 保存拼接后的长图 | ||||
|  |         png_file = pdf_file.replace(os.path.basename(pdf_file).split('.')[1], "png") | ||||
|  |         long_image.save(png_file) | ||||
|  | 
 | ||||
|  |         # 删除中间临时保存的图片 | ||||
|  |         shutil.rmtree(image_path) | ||||
|  |     except IOError as error: | ||||
|  |         logging.error('pdf转png失败') | ||||
|  |         raise error | ||||
|  |     else: | ||||
|  |         logging.info("pdf转png成功") | ||||
|  |         return png_file | ||||
|  | 
 | ||||
|  | 
 | ||||
|  | def word_to_long_image(word_file_path): | ||||
|  |     try: | ||||
|  |         _file = os.path.abspath(word_file_path)  # os.path.abspath('input.docx') | ||||
|  | 
 | ||||
|  |         if platform.system().lower() == 'windows': | ||||
|  |             pdf_file = word2pdf(_file) | ||||
|  |         else: | ||||
|  |             pdf_file = word2pdf_linux(_file) | ||||
|  | 
 | ||||
|  |         png_file = pdf2png(pdf_file) | ||||
|  | 
 | ||||
|  |         # 删除中间保存的pdf文件 | ||||
|  |         os.remove(pdf_file) | ||||
|  | 
 | ||||
|  |         return png_file | ||||
|  |     except Exception as error: | ||||
|  |         logging.error('word转长图出错:{}'.format(error)) | ||||
|  |         raise error | ||||
					Loading…
					
					
				
		Reference in new issue