python-docx的简单使用
''' | |
设置表格所有单元格的四个边为0.5磅,黑色,实线 | |
可以使用返回值,也可以不使用 | |
''' | |
def 设置表格网格线为黑色实线(table_object:object): | |
kwargs = { | |
"top":{"sz": 4, "val": "single", "color": "#000000"}, | |
"bottom":{"sz": 4, "val": "single", "color": "#000000"}, | |
"left":{"sz": 4, "val": "single", "color": "#000000"}, | |
"right":{"sz": 4, "val": "single", "color": "#000000"}, | |
"insideV":{"sz": 4, "val": "single", "color": "#000000"}, | |
"insideH":{"sz": 4, "val": "single", "color": "#000000"} | |
} | |
borders = docx.oxml.OxmlElement('w:tblBorders') | |
for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'): | |
edge_data = kwargs.get(tag) | |
if edge_data: | |
any_border = docx.oxml.OxmlElement(f'w:{tag}') | |
for key in ["sz", "val", "color", "space", "shadow"]: | |
if key in edge_data: | |
any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key])) | |
borders.append(any_border) | |
table_object._tbl.tblPr.append(borders) | |
return table_object | |
''' | |
设置标题样式 | |
''' | |
def 设置标题样式为黑色宋体(heading_object:object): | |
heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT #左对齐 | |
for run in heading_object.runs: | |
run.font.name=u'宋体' #设置为宋体 | |
#run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用 | |
run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色 | |
return heading_object | |
''' | |
创建docx文档,将翻译结果和原文写入文档中 | |
''' | |
def word(): | |
my_word_doc = docx.Document() #打开一个空白文档 | |
# for style in my_word_doc.styles: | |
# print(style) | |
heading = my_word_doc.add_heading(翻译结果["NAME"],level=2) #指定样式标题2 | |
设置标题样式为黑色宋体(heading) | |
heading = my_word_doc.add_heading("描述",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
for line in 翻译结果["SYNOPSIS"].split("\n"): | |
my_word_doc.add_paragraph(line) | |
for line in 翻译结果["DESCRIPTION"].split("\n"): | |
my_word_doc.add_paragraph(line) | |
heading = my_word_doc.add_heading("参数",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
#table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3) #指定样式标题3;在末尾添加一个表 | |
table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2) #指定样式标题3;在末尾添加一个表 | |
#table.style = my_word_doc.styles['Medium Grid 1'] | |
设置表格网格线为黑色实线(table) | |
index=0 | |
for key,value in 翻译结果["PARAMETERS"].items(): | |
for line in key.split("\n"): | |
cell = table.cell(index,0) | |
cell.text += line | |
for line in value.split("\n"): | |
table.cell(index,1).text += line | |
#table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key] | |
cell_paragraphs = table.cell(index,0).paragraphs | |
for i in cell_paragraphs: | |
i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
cell_paragraphs = table.cell(index,1).paragraphs | |
for i in cell_paragraphs: | |
i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
# table.cell(index,2).text = value | |
# cell_paragraphs = table.cell(index,2).paragraphs | |
# for i in cell_paragraphs: | |
# i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
index += 1 | |
heading = my_word_doc.add_heading("示例",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
for key,value in 翻译结果["Example"].items(): | |
heading = my_word_doc.add_heading(key[0:-1],level=4) #指定样式标题4 | |
设置标题样式为黑色宋体(heading) | |
for line in value.split("\n"): | |
my_word_doc.add_paragraph(line) | |
my_word_doc.save(r"C:\Users\gyj\Downloads\temp.docx") |
实际上我是拿来转换PowerShell cmdlet命令的帮助txt文件为word文档的。其中带了翻译。简单记录下。
import docx #pip install python-docx | |
import re | |
import json | |
import requests | |
import time | |
import hashlib | |
from urllib.parse import urlencode | |
import random | |
import copy | |
#字典形式 | |
帮助文件解析结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}}; | |
翻译结果={"NAME":"","SYNOPSIS":"","DESCRIPTION":"","PARAMETERS":{},"Example":{}}; | |
''' | |
''' | |
def 解析PowerShell命令的帮助文本(文件路径:str): | |
with open(文件路径,mode='r',encoding='utf-8') as fd_help: | |
line = fd_help.readline(); | |
while line: | |
if(line.strip() == "NAME"): | |
line = fd_help.readline() | |
line = line.strip() | |
帮助文件解析结果['NAME'] = line | |
elif(line.strip() == "SYNOPSIS"): | |
line = fd_help.readline() | |
while line[0:4]==" ": | |
帮助文件解析结果['SYNOPSIS'] += line.lstrip() | |
line = fd_help.readline() | |
elif(line.strip() == "DESCRIPTION"): | |
line = fd_help.readline() | |
while line[0:4]==" ": | |
帮助文件解析结果['DESCRIPTION'] += line.lstrip() | |
line = fd_help.readline() | |
elif(line.strip() == "PARAMETERS"): | |
line = fd_help.readline() | |
while line[0:4]==" ": | |
参数名 = line.lstrip() | |
帮助文件解析结果['PARAMETERS'][参数名] = "" | |
line = fd_help.readline() | |
while line[0:8]==" ": | |
帮助文件解析结果['PARAMETERS'][参数名] += line.lstrip() | |
line = fd_help.readline() | |
if(len(re.findall(r'^\s*-*\s*Example',line))): | |
break | |
elif(len(re.findall(r'^\s*-*\s*Example',line))): | |
temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line) | |
temp.rstrip() | |
帮助文件解析结果["Example"][temp]=""; | |
line = fd_help.readline() | |
while line: | |
if(len(re.findall(r'^\s*-*\s*Example',line))): | |
temp=re.sub(r"^\s*-*\s*([\w,\.: ]+) *-*",r"\1",line) | |
temp.rstrip() | |
帮助文件解析结果["Example"][temp]=""; | |
line = fd_help.readline() | |
while not (len(re.findall(r'^\s*-*\s*Example',line))) and line: | |
帮助文件解析结果["Example"][temp]+=line.lstrip() | |
line = fd_help.readline() | |
else: | |
line = fd_help.readline() | |
#检查key=""的情况,并删除这样的key | |
if("" in 帮助文件解析结果): | |
del 帮助文件解析结果[""] | |
if("" in 帮助文件解析结果["PARAMETERS"]): | |
del 帮助文件解析结果["PARAMETERS"][""] | |
if("" in 帮助文件解析结果["Example"]): | |
del 帮助文件解析结果["Example"][""] | |
# 本文件是通过请求有道翻译,去获取翻译结果 | |
''' | |
i: 你好 | |
from: AUTO | |
to: AUTO | |
smartresult: dict | |
client: fanyideskweb | |
salt: 16643765479061 //毫秒级别的时间戳后面加上个0-9之间的随机数,js代码:r + parseInt(10 * Math.random(), 10);这里的r表示时间戳字符串 | |
sign: 1d69ce8f7c6258243e573e31e29e0012 //签名,下面找到了 | |
lts: 1664376547906 //毫秒级别的时间戳 | |
bv: 42c8b36dd7d61c619e7b1dc11e44d870 //同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53") | |
doctype: json | |
version: 2.1 | |
keyfrom: fanyi.web | |
action: FY_BY_REALTlME | |
/* | |
解密出来了:sign的计算如下 | |
define("newweb/common/service", ["./utils", "./md5", "./jquery-1.7"], function(e, t) { | |
var n = e("./jquery-1.7"); | |
e("./utils"); | |
e("./md5"); | |
var r = function(e) { | |
var t = n.md5(navigator.appVersion) | |
, r = "" + (new Date).getTime() | |
, i = r + parseInt(10 * Math.random(), 10); | |
return { | |
ts: r, | |
bv: t, | |
salt: i, | |
sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,e是要翻译的内容,i是毫秒级别的时间戳后面加上个0-9之间的随机数;后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码 | |
} | |
}; | |
总体来说,data数据由函数“generateSaltSign”计算出来 | |
*/ | |
''' | |
''' | |
获取翻译结果 | |
The_translated_string:被翻译的字符串 | |
由于翻译是以行为单位,所以一行一个结果,函数将解析The_translated_string参数,并以字符串形式返回所有翻译结果 | |
''' | |
def youdao_translate(The_translated_string:str): | |
if(The_translated_string == ""): | |
return {"":""} | |
url = r'https://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule' | |
User_Agent = "5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36 Edg/106.0.1370.37" | |
header = { | |
"Accept": "application/json, text/javascript, */*; q=0.01", | |
"Accept-Encoding": "gzip, deflate, br", | |
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6", | |
"Connection": "keep-alive", | |
#"Content-Length": "307", | |
"Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", | |
"Cookie":"OUTFOX_SEARCH_USER_ID=1135160796@10.108.162.134; OUTFOX_SEARCH_USER_ID_NCOO=775555146.507473; JSESSIONID=aaaQ2GYK5N-ozb24rKNcy; SESSION_FROM_COOKIE=unknown; DICT_UGC=be3af0da19b5c5e6aa4e17bd8d90b28a|; JSESSIONID=abcPzon0RcZqc7GltuAgy; ___rl__test__cookies=1665366515354", | |
"Host": "fanyi.youdao.com", | |
"Origin": "https://fanyi.youdao.com", | |
"Referer": "https://fanyi.youdao.com/", | |
"sec-ch-ua": """\"Google Chrome";v="105", "Not)A;Brand";v="8", "Chromium";v="105"\"""", | |
"sec-ch-ua-mobile": "?0", | |
"sec-ch-ua-platform": "Windows", | |
"Sec-Fetch-Dest": "empty", | |
"Sec-Fetch-Mode": "cors", | |
"Sec-Fetch-Site": "same-origin", | |
"User-Agent": User_Agent, | |
"X-Requested-With": "XMLHttpRequest" | |
} | |
#The_translated_string=input("输入你要翻译的中文信息:\n") # 被翻译的字符串 | |
timestamp = str(round(time.time()*1000)) # 毫秒级别的时间戳 | |
salt = timestamp + str(random.randint(0, 9)) # 毫秒级别的时间戳后面加上个0-9之间的随机数 | |
sign_str = "fanyideskweb" + The_translated_string + salt + "Ygy_4c=r#e#4EX^NUGUc5" # 构造签名字符串 | |
# 签名,算法:sign: n.md5("fanyideskweb" + e + i + "Ygy_4c=r#e#4EX^NUGUc5") //在这里,后面这串字符串估计是服务器那边随机生成的,应该会变化。每次抓取的时候,可以查看下js代码 | |
sign = hashlib.md5(str.encode(sign_str)).hexdigest() | |
# 同设备相同,使用md5加密的(方法是:md5(User-Agent)==>md5("5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36 Edg/105.0.1343.53") | |
bv = hashlib.md5(str.encode(User_Agent)).hexdigest() | |
cookies = { | |
"OUTFOX_SEARCH_USER_ID": "1135160796@10.108.162.134", | |
"OUTFOX_SEARCH_USER_ID_NCOO": "775555146.507473", | |
"JSESSIONID": "aaaQ2GYK5N-ozb24rKNcy", | |
"SESSION_FROM_COOKIE": "unknown", | |
"DICT_UGC": "be3af0da19b5c5e6aa4e17bd8d90b28a|", | |
"JSESSIONID": "abcPzon0RcZqc7GltuAgy", | |
"___rl__test__cookies": "1665366515354" | |
} | |
data = { | |
"i": The_translated_string, | |
"from": "AUTO", | |
"to": "AUTO", | |
# 一般来说,是从中文翻译为英文 | |
# "from": "zh-CHS", | |
# "to": "en", | |
"smartresult": "dict", | |
"client": "fanyideskweb", | |
"salt": salt, | |
"sign": sign, | |
"lts": timestamp, | |
"bv": bv, | |
"doctype": "json", | |
"version": "2.1", | |
"keyfrom": "fanyi.web", | |
"action": "FY_BY_CLICKBUTTION" | |
} | |
data = urlencode(data); | |
result = requests.post(url, data=data, cookies=cookies,headers=header) | |
json_result = json.loads(result.text); | |
#lines = The_translated_string.splitlines(); #按照行边界拆分 | |
if(not json_result["errorCode"]): | |
ret_list=""; | |
for i in json_result["translateResult"]: #如果源字符串就是存在段落的,则这里就会根据其来分结果 | |
for j in i: #翻译服务器认为该分段的,这里就会再次存在子项 | |
ret_list+=j['tgt'] | |
ret_list+="\n" | |
#ret_list[json_result["translateResult"][i][0]["src"]]=json_result["translateResult"][i][0]["tgt"] | |
return ret_list; | |
else: | |
return "errorCode = "+str(json_result["errorCode"]); | |
#示例 | |
''' | |
str_ = "你好\n世界\n我来了\n哈哈" | |
print(youdao_translate(str_)) | |
-----out----- | |
{'你好': 'hello', '世界': 'The world', '我来了': "I'm coming", '哈哈': 'Ha ha'} | |
''' | |
''' | |
将变量“帮助文件解析结果”中的value翻译成中文 | |
''' | |
def 执行翻译(): | |
翻译结果 = copy.deepcopy(帮助文件解析结果) #这里执行了深拷贝了,所以它不再是外面的那个“翻译结果”变量了 | |
翻译结果["SYNOPSIS"] = youdao_translate(翻译结果["SYNOPSIS"]) | |
翻译结果["DESCRIPTION"] = youdao_translate(翻译结果["DESCRIPTION"]) | |
for key,value in 翻译结果["PARAMETERS"].items(): | |
if(value == ""): | |
continue | |
翻译结果["PARAMETERS"][key] = youdao_translate(value) | |
for key,value in 翻译结果["Example"].items(): | |
if(value == ""): | |
continue | |
翻译结果["Example"][key] = youdao_translate(value) | |
temp_dict = {} | |
for key,value in 翻译结果["Example"].items(): | |
if(key == ""): | |
continue | |
temp_dict[youdao_translate(key)] = value | |
翻译结果["Example"] = temp_dict | |
return 翻译结果 | |
''' | |
设置表格所有单元格的四个边为0.5磅,黑色,实线 | |
可以使用返回值,也可以不使用 | |
''' | |
def 设置表格网格线为黑色实线(table_object:object): | |
kwargs = { | |
"top":{"sz": 4, "val": "single", "color": "#000000"}, | |
"bottom":{"sz": 4, "val": "single", "color": "#000000"}, | |
"left":{"sz": 4, "val": "single", "color": "#000000"}, | |
"right":{"sz": 4, "val": "single", "color": "#000000"}, | |
"insideV":{"sz": 4, "val": "single", "color": "#000000"}, | |
"insideH":{"sz": 4, "val": "single", "color": "#000000"} | |
} | |
borders = docx.oxml.OxmlElement('w:tblBorders') | |
for tag in ('bottom', 'top', 'left', 'right', 'insideV', 'insideH'): | |
edge_data = kwargs.get(tag) | |
if edge_data: | |
any_border = docx.oxml.OxmlElement(f'w:{tag}') | |
for key in ["sz", "val", "color", "space", "shadow"]: | |
if key in edge_data: | |
any_border.set(docx.oxml.ns.qn(f'w:{key}'), str(edge_data[key])) | |
borders.append(any_border) | |
table_object._tbl.tblPr.append(borders) | |
return table_object | |
''' | |
设置标题样式 | |
''' | |
def 设置标题样式为黑色宋体(heading_object:object): | |
heading_object.alignment = docx.enum.text.WD_ALIGN_PARAGRAPH.LEFT #左对齐 | |
for run in heading_object.runs: | |
run.font.name=u'宋体' #设置为宋体 | |
#run._element.rPr.rFonts.set(qn('w:eastAsia'), u'宋体')#设置为宋体,和上边的一起使用 | |
run.font.color.rgb = docx.shared.RGBColor(0,0,0)#设置颜色为黑色 | |
return heading_object | |
''' | |
创建docx文档,将翻译结果和原文写入文档中 | |
''' | |
def word(): | |
my_word_doc = docx.Document() #打开一个空白文档 | |
# for style in my_word_doc.styles: | |
# print(style) | |
heading = my_word_doc.add_heading(翻译结果["NAME"],level=2) #指定样式标题2 | |
设置标题样式为黑色宋体(heading) | |
heading = my_word_doc.add_heading("描述",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
for line in 翻译结果["SYNOPSIS"].split("\n"): | |
my_word_doc.add_paragraph(line) | |
for line in 翻译结果["DESCRIPTION"].split("\n"): | |
my_word_doc.add_paragraph(line) | |
heading = my_word_doc.add_heading("参数",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
#table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=3) #指定样式标题3;在末尾添加一个表 | |
table = my_word_doc.add_table(rows=len(翻译结果["PARAMETERS"]), cols=2) #指定样式标题3;在末尾添加一个表 | |
#table.style = my_word_doc.styles['Medium Grid 1'] | |
设置表格网格线为黑色实线(table) | |
index=0 | |
for key,value in 翻译结果["PARAMETERS"].items(): | |
for line in key.split("\n"): | |
cell = table.cell(index,0) | |
cell.text += line | |
for line in value.split("\n"): | |
table.cell(index,1).text += line | |
#table.cell(index,1).text = 帮助文件解析结果["PARAMETERS"][key] | |
cell_paragraphs = table.cell(index,0).paragraphs | |
for i in cell_paragraphs: | |
i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
cell_paragraphs = table.cell(index,1).paragraphs | |
for i in cell_paragraphs: | |
i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
# table.cell(index,2).text = value | |
# cell_paragraphs = table.cell(index,2).paragraphs | |
# for i in cell_paragraphs: | |
# i.alignment = docx.enum.text.WD_PARAGRAPH_ALIGNMENT.LEFT #左对齐 | |
index += 1 | |
heading = my_word_doc.add_heading("示例",level=3) #指定样式标题3 | |
设置标题样式为黑色宋体(heading) | |
for key,value in 翻译结果["Example"].items(): | |
heading = my_word_doc.add_heading(key[0:-1],level=4) #指定样式标题4 | |
设置标题样式为黑色宋体(heading) | |
for line in value.split("\n"): | |
my_word_doc.add_paragraph(line) | |
my_word_doc.save(r"C:\Users\xxx\Downloads\temp.docx") | |
解析PowerShell命令的帮助文本(r"C:\Users\xxx\Downloads\Get-CimClass.txt") | |
#有道翻译的新版翻译结果解析实在是太变态了,我搞不了,是在不行就把格式弄好,使用文档翻译翻译吧。 | |
#翻译结果 = 执行翻译() | |
翻译结果=帮助文件解析结果 | |
word() |