{"id":263362,"date":"2023-03-08T09:48:34","date_gmt":"2023-03-08T01:48:34","guid":{"rendered":"https:\/\/lrxjmw.cn\/?p=263362"},"modified":"2023-02-03T23:47:04","modified_gmt":"2023-02-03T15:47:04","slug":"python-word-pdf","status":"publish","type":"post","link":"https:\/\/lrxjmw.cn\/python-word-pdf.html","title":{"rendered":"\u7528python\u5c06word\u6587\u6863\u548cpdf\u7535\u5b50\u4e66\u8fdb\u884c\u683c\u5f0f\u4e92\u8f6c"},"content":{"rendered":"\n\n\n
\u5bfc\u8bfb<\/td>\n\u4e00\u4e9b\u91cd\u8981\u6587\u6863\u683c\u5f0f\u4e4b\u95f4\u7684\u4e92\u8f6c\u5728\u76ee\u524d\u663e\u5f97\u5c24\u4e3a\u91cd\u8981\uff0cpdf\u4f5c\u4e3a\u901a\u7528\u683c\u5f0f\u5728\u73b0\u5728\u5404\u4e2a\u5e73\u53f0\u4e0a\u517c\u5bb9\u6027\u662f\u6700\u597d\u7684\uff0c\u6240\u4ee5\u5199python\u811a\u672c\u5c06\u8fd9\u4e9bword\u6587\u6863\u6279\u91cf\u8f6c\u6362pdf\u662f\u6700\u597d\u7684\u89e3\u51b3\u65b9\u6848\u3002<\/strong><\/td>\n<\/tr>\n<\/tbody>\n<\/table>\n

\u7531\u4e8ewindows\u7cfb\u7edf\u5bf9\u4e8eword\u6587\u6863\u6709\u5929\u7136\u7684\u517c\u5bb9\u6027\u4f18\u52bf\uff0c\u6240\u4ee5\u8f6c\u6362\u8d77\u6765\u5f88\u7b80\u5355\uff0c\u666e\u904d\u4e0a\u662f\u901a\u8fc7comtypes\u6a21\u5757\u3002<\/p>\n

pip3 install comtypes<\/pre>\n
from comtypes.client import CreateObject\r\nimport os\r\n\r\ndef wd_to_pdf(folder):\r\n        #\u83b7\u53d6\u6307\u5b9a\u76ee\u5f55\u4e0b\u9762\u7684\u6240\u6709\u6587\u4ef6\r\n        files = os.listdir(folder)\r\n        #\u83b7\u53d6word\u7c7b\u578b\u7684\u6587\u4ef6\u653e\u5230\u4e00\u4e2a\u5217\u8868\u91cc\u9762\r\n        wdfiles = [f for f in files if f.endswith((\".doc\", \".docx\"))]\r\n        for wdfile in wdfiles:\r\n            #\u5c06word\u6587\u4ef6\u653e\u5230\u6307\u5b9a\u7684\u8def\u5f84\u4e0b\u9762\r\n            wdPath = os.path.join(folder, wdfile)\r\n            #\u8bbe\u7f6e\u5c06\u8981\u5b58\u653epdf\u6587\u4ef6\u7684\u8def\u5f84\r\n            pdfPath = wdPath\r\n            #\u5224\u65ad\u662f\u5426\u5df2\u7ecf\u5b58\u5728\u5bf9\u5e94\u7684pdf\u6587\u4ef6\uff0c\u5982\u679c\u4e0d\u5b58\u5728\u5c31\u52a0\u5165\u5230\u5b58\u653epdf\u7684\u8def\u5f84\u5185\r\n            if pdfPath[-3:] != 'pdf':\r\n                pdfPath = pdfPath + \".pdf\"\r\n            #\u5c06word\u6587\u6863\u8f6c\u5316\u4e3apdf\u6587\u4ef6\uff0c\u5148\u6253\u5f00word\u6240\u5728\u8def\u5f84\u6587\u4ef6\uff0c\u7136\u540e\u5728\u5904\u7406\u540e\u4fdd\u5b58pdf\u6587\u4ef6\uff0c\u6700\u540e\u5173\u95ed\r\n            pdfCreate = self.wdToPDF.Documents.Open(wdPath)\r\n            pdfCreate.SaveAs(pdfPath, self.wdFormatPDF)<\/pre>\n

\u5176\u5b9e\u96be\u70b9\u8fd8\u662f\u5728Linux\u7cfb\u7edf\u4e0b\u5982\u4f55\u8f6c\u6362\uff0c\u56e0\u4e3acomtypes\u4f9d\u8d56\u7684win32com\u6a21\u5757\u5728linux\u4e0b\u662f\u65e0\u6cd5\u4f7f\u7528\u7684\uff0c\u6240\u4ee5\u5728linux\u4e0b\u9762\u63a8\u8350\u53e6\u5916\u4e00\u5957\u89e3\u51b3\u65b9\u6848\u4e5f\u5c31\u662fLibreOffice\uff0cLibreOffice \u80fd\u591f\u4e0e Microsoft Office \u7cfb\u5217\u4ee5\u53ca\u5176\u5b83\u5f00\u6e90\u529e\u516c\u8f6f\u4ef6\u6df1\u5ea6\u517c\u5bb9\uff0c\u4e14\u652f\u6301\u7684\u6587\u6863\u683c\u5f0f\u76f8\u5f53\u5168\u9762\u3002 <\/p>\n

\u9996\u5148\u5378\u8f7d\u5f53\u524d\u7cfb\u7edf\u7684libreoffice\uff0c\u56e0\u4e3a\u5927\u591a\u6570\u7cfb\u7edf\u9ed8\u8ba4\u5b89\u88c5\u7684\u90fd\u662f\u4f4e\u7248\u672c\uff0c\u6211\u4eec\u8981\u4f7f\u7528\u7684\u662f\u6700\u65b0\u7a33\u5b9a\u7248<\/p>\n

yum remove libreoffice-*<\/pre>\n

\u5728https:\/\/www.libreoffice.org\/download\/download\/\u4e0a\u4e0b\u8f7d\u6700\u65b0\u7684\u7a33\u5b9a\u7248gz\u538b\u7f29\u5305 <\/p>\n

\u5b89\u88c5java\u4f9d\u8d56 <\/p>\n

yum -y install java-1.8.0-openjdk*<\/pre>\n

\u7136\u540e\u5c06\u521a\u624d\u4e0b\u8f7d\u7684gz\u538b\u7f29\u5305\u89e3\u538b\u540e\u5b89\u88c5 <\/p>\n

tar xvf LibreOffice_6.2.5.2_Linux_x86-64_rpm.tar.gz\r\n \r\ncd LibreOffice_6.2.5.2_Linux_x86-64_rpm\/RPMS\/\r\n \r\nyum localinstall *.rpm<\/pre>\n

\u6700\u540e\u5b89\u88c5\u4e00\u4e9b\u4f9d\u8d56<\/p>\n

yum install cairo cups-libs libSM\r\nyum install ibus\r\nyum install libreoffice-headless<\/pre>\n

\u5728\u547d\u4ee4\u884c\u8f93\u5165 <\/p>\n

libreoffice -help<\/pre>\n

\u4f1a\u663e\u793a\u5e2e\u52a9\u6587\u6863\u5c31\u6ca1\u95ee\u9898\u4e86 <\/p>\n

\"\"<\/p>\n

\u89e3\u51b3\u4e2d\u6587\u4e71\u7801\u95ee\u9898<\/p>\n

\u5b89\u88c5windows\u5b57\u4f53\uff08\u4e0d\u88c5\uff0c\u4f1a\u6709\u4e71\u7801\uff09\u5c06windows\u7684\u5b57\u4f53\u590d\u5236\u5230linux\u4e0aC:WindowsFonts* windows\u4e0a\u6240\u6709\u7684\u5b57\u4f53\uff08\u5c1d\u8bd5\u8fc7\u53ea\u590d\u5236\u90e8\u5206\uff0c\u8fd8\u662f\u6709\u4e71\u7801\uff0c\u5168\u90e8\u5b57\u4f53\u5c31\u4e0d\u4f1a\u4e71\u7801\u4e86\uff09\uff0c\u4e0a\u4f20\u5230linux\u7684\/usr\/share\/fonts\/chinese('chinese'\u76ee\u5f55\u662f\u6211\u81ea\u5df1\u5efa\u7684\uff0cmkdir chinese) <\/p>\n

chmod -R 755 \/usr\/share\/fonts\/chinese    \/\/ \u4fee\u6539\u6743\u9650\r\n\r\nfc-cache -fv        \/\/ \u5efa\u7acb\u5b57\u4f53\u7f13\u5b58\r\n\r\nfc-list | grep chinese        \/\/ \u53ef\u4ee5\u67e5\u770b\u5230\u5df2\u5b89\u88c5\u65b0\u589e\u7684\u5b57\u4f53\u4e86<\/pre>\n

\u5982\u679c\u4f60\u5acc\u9ebb\u70e6\uff0c\u4e5f\u53ef\u4ee5\u4fee\u6539\u7cfb\u7edf\u8bed\u8a00\u6765\u652f\u6301\u4e2d\u6587 <\/p>\n

\u6267\u884c\u547d\u4ee4\uff1a<\/p>\n

yum groupinstall \"fonts\"<\/pre>\n

\u5b89\u88c5\u6210\u529f\u540e,
\n\u6253\u5f00<\/p>\n

vim \/etc\/locale.conf<\/pre>\n

\u6309\u952e i \u8fdb\u5165\u7f16\u8f91\u6a21\u5f0f, \u628a\u5185\u5bb9\u6539\u4e3a<\/p>\n

LANG=\"zh_CN.UTF-8\"<\/pre>\n

wq \u5b58\u76d8
\n\u7136\u540e\u91cd\u542f\u670d\u52a1\u5668reboot
\n\u4e4b\u540e\u4e5f\u53ef\u4ee5\u652f\u6301\u4e2d\u6587\u8f6c\u6362\u4e86
\n\u8f6c\u6362\u547d\u4ee4 <\/p>\n

libreoffice6.2 --headless --convert-to pdf \/root\/4321.docx<\/pre>\n

\"\"<\/p>\n

\u6b64\u65f6\uff0c\u6211\u4eec\u8981\u6539\u9020\u4e00\u4e0b\u8f6c\u6362\u811a\u672c\uff0c\u505a\u5230\u53ef\u4ee5\u517c\u5bb9windows\u548cLinx\u53cc\u7cfb\u7edf\uff0c\u4efb\u610f\u7cfb\u7edf\u4e0b\u90fd\u53ef\u4ee5\u8c03\u7528\u811a\u672c\u8fdb\u884c\u8f6c\u6362 <\/p>\n

import subprocess\r\nimport os\r\ntry:\r\n    from comtypes import client\r\nexcept ImportError:\r\n    client = None\r\n\r\ndef doc2pdf(doc):\r\n    \"\"\"\r\n    convert a doc\/docx document to pdf format\r\n    :param doc: path to document\r\n    \"\"\"\r\n    doc = os.path.abspath(doc) # bugfix - searching files in windows\/system32\r\n    if client is None:\r\n        return doc2pdf_linux(doc)\r\n    name, ext = os.path.splitext(doc)\r\n    try:\r\n        word = client.DispatchEx(\"Word.Application\")\r\n        worddoc = word.Documents.Open(doc)\r\n        worddoc.SaveAs(name + '.pdf', FileFormat=17)\r\n    except Exception:\r\n        raise\r\n    finally:\r\n        worddoc.Close()\r\n        word.Quit()\r\n\r\n\r\ndef doc2pdf_linux(doc):\r\n    \"\"\"\r\n    convert a doc\/docx document to pdf format (linux only, requires libreoffice)\r\n    :param doc: path to document\r\n    \"\"\"\r\n    cmd = 'libreoffice6.2 --headless --convert-to pdf'.split() + [doc]\r\n    p = subprocess.Popen(cmd, stderr=subprocess.PIPE, stdout=subprocess.PIPE)\r\n    p.wait(timeout=10)\r\n    stdout, stderr = p.communicate()\r\n    if stderr:\r\n        raise subprocess.SubprocessError(stderr)<\/pre>\n

\u7b80\u76f4\u5b8c\u7f8e\uff0c\u53ef\u4ee5\u6536\u5de5\u4e86 <\/p>\n","protected":false},"excerpt":{"rendered":"

\u7531\u4e8ewindows\u7cfb\u7edf\u5bf9\u4e8eword\u6587\u6863\u6709\u5929\u7136\u7684\u517c\u5bb9\u6027\u4f18\u52bf\uff0c\u6240\u4ee5\u8f6c\u6362\u8d77\u6765\u5f88\u7b80\u5355\uff0c\u666e\u904d\u4e0a\u662f\u901a\u8fc7comtypes\u6a21\u5757 […]<\/p>\n","protected":false},"author":310,"featured_media":263372,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[55],"tags":[],"class_list":["post-263362","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-thread"],"acf":[],"_links":{"self":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/263362","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/users\/310"}],"replies":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/comments?post=263362"}],"version-history":[{"count":7,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/263362\/revisions"}],"predecessor-version":[{"id":263371,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/posts\/263362\/revisions\/263371"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media\/263372"}],"wp:attachment":[{"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/media?parent=263362"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/categories?post=263362"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/lrxjmw.cn\/wp-json\/wp\/v2\/tags?post=263362"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}