欧美久久久精品,欧美日韩精品,欧美视频三区

本文實例講述了html">Python實現(xiàn)批量將word轉html并將html內容發(fā)布至網站的方法。分享給大家供大家參考。具體實現(xiàn)方法如下：

				?

									#coding=utf-8

									__author__ = 'zhm'

									from win32com import client as wc

									import os

									import time

									import random

									import MySQLdb

									import re

									def wordsToHtml(dir):

									#批量把文件夾的word文檔轉換成html文件

									 #金山WPS調用，搶先版的用KWPS，正式版WPS

									 word = wc.Dispatch('KWPS.Application')

									 for path, subdirs, files in os.walk(dir):

									  for wordFile in files:

									   wordFullName = os.path.join(path, wordFile)

									   #print "word:" + wordFullName

									   doc = word.Documents.Open(wordFullName)

									   wordFile2 = unicode(wordFile, "gbk")

									   dotIndex = wordFile2.rfind(".")

									   if(dotIndex == -1):

									    print '********************ERROR: 未取得后綴名！'

									   fileSuffix = wordFile2[(dotIndex + 1) : ]

									   if(fileSuffix == "doc" or fileSuffix == "docx"):

									    fileName = wordFile2[ : dotIndex]

									    htmlName = fileName + ".html"

									    htmlFullName = os.path.join(unicode(path, "gbk"), htmlName)

									    # htmlFullName = unicode(path, "gbk") + "\\" + htmlName

									    print u'生成了html文件：' + htmlFullName

									    doc.SaveAs(htmlFullName, 8)

									    doc.Close()

									 word.Quit()

									 print ""

									 print "Finished!"

									def html_add_to_db(dir):

									#將轉換成功的html文件批量插入數(shù)據(jù)庫中。

									 conn = MySQLdb.connect(

									  host='localhost',

									  port=3306,

									  user='root',

									  passwd='root',

									  db='test',

									  charset='utf8'

									  )

									 cur = conn.cursor()

									 for path, subdirs, files in os.walk(dir):

									  for htmlFile in files:

									   htmlFullName = os.path.join(path, htmlFile)

									   title = os.path.splitext(htmlFile)[0]

									   targetDir = 'D:/files/htmls/'

									   #D:/files為web服務器配置的靜態(tài)目錄

									   sconds = time.time()

									   msconds = sconds * 1000

									   targetFile = os.path.join(targetDir, str(int(msconds))+str(random.randint(100, 10000)) +'.html')

									   htmlFile2 = unicode(htmlFile, "gbk")

									   dotIndex = htmlFile2.rfind(".")

									   if(dotIndex == -1):

									    print '********************ERROR: 未取得后綴名！'

									   fileSuffix = htmlFile2[(dotIndex + 1) : ]

									   if(fileSuffix == "htm" or fileSuffix == "html"):

									    if not os.path.exists(targetDir):

									     os.makedirs(targetDir)

									    htmlFullName = os.path.join(unicode(path, "gbk"), htmlFullName)

									    htFile = open(htmlFullName,'rb')

									    #獲取網頁內容

									    htmStrCotent = htFile.read()

									    #找出里面的圖片

									    img=re.compile(r"""<img\s.*?\s?src\s*=\s*['|"]?([^\s'"]+).*?>""",re.I)

									    m = img.findall(htmStrCotent)

									    for tagContent in m:

									     imgSrc = unicode(tagContent, "gbk")

									     imgSrcFullName = os.path.join(path, imgSrc)

									     #上傳圖片

									     imgTarget = 'D:/files/images/whzx/'

									     img_sconds = time.time()

									     img_msconds = sconds * 1000

									     targetImgFile = os.path.join(imgTarget, str(int(img_msconds))+str(random.randint(100, 10000)) +'.png')

									     if not os.path.exists(imgTarget):

									      os.makedirs(imgTarget)

									     if not os.path.exists(targetImgFile) or(os.path.exists(targetImgFile) and (os.path.getsize(targetImgFile) != os.path.getsize(imgSrcFullName))):

									      tmpImgFile = open(imgSrcFullName,'rb')

									      tmpWriteImgFile = open(targetImgFile, "wb")

									      tmpWriteImgFile.write(tmpImgFile.read())

									      tmpImgFile.close()

									      tmpWriteImgFile.close()

									      htmStrCotent=htmStrCotent.replace(tagContent,targetImgFile.split(":")[1])

									    if not os.path.exists(targetFile) or(os.path.exists(targetFile) and (os.path.getsize(targetFile) != os.path.getsize(htmlFullName))):

									     #用iframe包裝轉換好的html文件。

									     iframeHtml='''

									     <script type="text/javascript" language="javascript">

									      function iFrameHeight() {

									       var ifm= document.getElementById("iframepage");

									       var subWeb = document.frames ? document.frames["iframepage"].document:ifm.contentDocument;

									       if(ifm != null && subWeb != null) {

									        ifm.height = subWeb.body.scrollHeight;

									       }

									      }

									     </script>

									     <iframe src='''+targetFile.split(':')[1]+'''

									      marginheight="0" marginwidth="0" frameborder="0" scrolling="no" width="765" height=100% id="iframepage" name="iframepage" onLoad="iFrameHeight()" ></iframe>

									     '''

									     tmpTargetFile = open(targetFile, "wb")

									     tmpTargetFile.write(htmStrCotent)

									     tmpTargetFile.close()

									     htFile.close()

									     try:

									      # 執(zhí)行

									      sql = "insert into common_article(title,content) values(%s,%s)"

									      param = (unicode(title, "gbk"),iframeHtml)

									      cur.execute(sql,param)

									     except:

									      print "Error: unable to insert data"

									 cur.close()

									 conn.commit()

									 # 關閉數(shù)據(jù)庫連接

									 conn.close()

									if __name__ == '__main__':

									 wordsToHtml('d:/word')

									 html_add_to_db('d:/word')