Python RE 正则表达式模块

通用字符匹配

语法	通配符匹配作用解析
.	匹配除换行符之外的任意一个字符
*	匹配前一个字符出现零次或任意多次
+	匹配前一个字符出现1次或任意多次
?	匹配前一个字符出现1次或0次
^	匹配以指定字符开头的数据
$	匹配以指定字符结尾的数据
{m}	匹配前一个字符出现过m次的记录
{n,m}	匹配前一个字符,最少出现n次,最多出现m次

匹配任意一个字符(.) 默认匹配除\n之外的任意一个字符,若指定flag=DOTALL则匹配包括换行在内的字符.

 
>>> re.search("hel.o","hello lyshark,hello world").group()
'hello'
 
>>> re.findall("hel.o","hello lyshark hello world")
['hello', 'hello']

匹配前一个字符出现0至任意多次(*) 匹配星号前面的字符出现0次,或任意多次.

 
>>> re.findall("ab*","abccba23acbcabb")
['ab', 'a', 'a', 'abb']

匹配前一个字符出现1次或任意多次(+) 匹配加号前面的字符出现过1次,或任意多次,至少出现一次.

 
>>> re.findall("ab+","abccba23acbcabb")
['ab', 'abb']

匹配前一个字符出现1次或0次(?) 匹配前一个字符出现过1次或0次,允许出现0次.

 
>>> re.findall("ab?","ab,abc,abb,abcd,a,acd,abc")
['ab', 'ab', 'ab', 'ab', 'a', 'a', 'ab']
 
>>> re.findall("ab?","ab,a,abc,abcde")
['ab', 'a', 'ab', 'ab']

匹配开头与结尾(^): ^匹配指定字符开头的数据,匹配指定字符结尾的数字.

 
>>> re.search(r"^h","hello world").group()
'h'
>>> re.search(r"world$","hello\nworld").group()
'world'
>>> re.search(r"^a","\nabc\ndef",flags=re.MULTILINE).group()
'a'
>>> re.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()
'foo'

匹配前一个字符出现次数(x{m}) 匹配前一个字符x,出现过m次的行.

 
>>> re.search("hello{2}","hello,helloo,hellooo,helloooo").group()
'helloo'
 
>>> re.search("hello{3}","hello,helloo,hellooo,helloooo").group()
'hellooo'

匹配前一个字符出现次数(x{n,m}) 匹配前一个字符x,最少出现过n次,最多出现过m次.

 
>>> re.search("hello{1,2}","hello,helloo,hellooo,helloooo").group()
'hello'
 
>>> re.findall("hello{1,2}","hello,helloo,hellooo,helloooo")
['hello', 'helloo', 'helloo', 'helloo']

脱意字符的匹配(\) 转义字符,通常情况下使后一个字符改变原来的意思,也叫做脱意字符.

 
>>> re.search("..\\t","hello\t lyshark\n").group()
'lo\t'
>>> re.search("\\t","hello\t lyshark\n").group()
'\t'
>>> re.search("\t","hello\t lyshark\n").group()
'\t'
>>> re.search(r"\\","hello\\lyshark").group()
'\\'

匹配查找范围([]) 匹配查找指定的数据范围,通常使用[0-9] [a-z] [A-Z]这几个匹配格式.

 
>>> re.search("[0-9]","hello 1,2,3,4,5").group()   # 匹配第一次出现数字的行
'1'
>>> re.search("[0-9]","hello a12 b23 34a 45t").group()
'1'
>>> re.findall("[0-9]","hello 1,2,3,4,5")          # 匹配所有出现数字的行
['1', '2', '3', '4', '5']
>>> re.findall("[0-9]","hello  b23 34a 45t wan")
['2', '3', '3', '4', '4', '5']
>>> re.search("[^0-9]","hello 1,2,3,4,5").group()  # 匹配开头不是0-9的单个字符
'h'
>>> re.search("[^0-9]*","hello 1,2,3,4,5").group() # 匹配开头不是0-9的单行行
'hello'
>>> re.search(r"[aeiou]","Hello LyShark").group()
'e'

匹配查找空白字符(s) 匹配空白字符

 
>>> re.search("\s+","ab\tc1\n3").group()
'\t'
>>> re.search("\s+","ab c1\n3").group()
' '

选择性匹配(|) 匹配选择竖线左边,或者右边的任意一种情况.

 
>>> re.search("abc|ABC","ABCBabcCD").group()
'ABC'
 
>>> re.findall("abc|ABC","ABCBabcCD")
['ABC', 'abc']

实现分组匹配((?P...)) 匹配条件并自动分组,其中?P<..>是固定写法,后面紧跟正则规则.

 
>>> number = "371481199306143242"
>>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})",number).groupdict()
{'province': '3714', 'city': '81', 'birthday': '1993'}
 
>>> re.search("(?P<name>[a-zA-Z]+)(?P<age>[0-9]+)","lyshark22").groupdict("temp")
{'name': 'lyshark', 'age': '22'}

针对IP地址与MAC地址的提取:

 
>>> re.search("^(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}$","192.168.1.1")
<re.Match object; span=(0, 11), match='192.168.1.1'>       # 匹配IP地址
 
>>> re.match(r"^\s*\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s*$","192.168.1.100")
<re.Match object; span=(0, 13), match='192.168.1.100'>     # 匹配IP地址
 
>>> string_ip = "is this 236.168.192.1 ip 12321"
>>> result = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", string_ip)
>>> result
['236.168.192.1']
 
>>> string=re.compile(r'((1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)\.){3}(1\d\d|2[0-4]\d|25[0-5]|[1-9]\d|\d)')
>>> print(string.search('245.255.256.25asdsa10.11.244.10').group())
10.11.244.10
 
>>> string_IPv6="1050:0:0:0:5:600:300c:326b"               # 匹配IPV6地址(大小写不敏感)
>>> re.match(r"^(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}$", string_IPv6, re.I)
<re.Match object; span=(0, 26), match='1050:0:0:0:5:600:300c:326b'>
 
>>> re.findall(r"(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])", string_IPv6, re.I)
['1050:0:0:0:5:600:300c:326b']
 
>>> re.match(r"^\s*([0-9a-fA-F]{2,2}:){5,5}[0-9a-fA-F]{2,2}\s*$","AB:1F:44:5B:3B:4A")
<re.Match object; span=(0, 17), match='AB:1F:44:5B:3B:4A'> # 匹配一个MAC地址

针对网址与端口的匹配:

 
>>> re.search(r"^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?$","https://www.baidu.com")
<re.Match object; span=(0, 21), match='https://www.baidu.com'> # 匹配网址
 
>>> re.findall(r"([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5])","hello 443")
['4', '4', '3']                                            # 匹配端口号
 
>>> re.search(r'^(http|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?(:([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$',"http://www.baidu.com:80")
<re.Match object; span=(0, 23), match='http://www.baidu.com:80'>
 
>>> re.search(r'^(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])\.(\d|[1-9]\d|1\d{2}|2[0-4]\d|25[0-5])(:([0-9]|[1-9]\d{1,3}|[1-5]\d{4}|6[0-4]\d{4}|65[0-4]\d{2}|655[0-2]\d|6553[0-5]))?$',"192.168.1.100:443")
<re.Match object; span=(0, 17), match='192.168.1.100:443'>

针对时间格式的匹配:

 
>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12")
<re.Match object; span=(0, 10), match='2019-01-12'>
 
>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12,2010-12-11")
['2019-01-12', '2010-12-11']
 
>>> re.findall(r"\d{4}[-/]\d{2}[-/]\d{2}","2019-01-12,2010/12/11")
['2019-01-12', '2010/12/11']
 
>>> re.search(r"(\d{1,2}/(Jan|Feb|Mar|Apr|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/\d{4})","2019-01-12,21/Nov/2019").group()
'21/Nov/2019'
 
>>> re.findall(r"(\d{1,2}:\d{1,2})","2010-12-11 12:11")
['12:11']
 
>>> re.findall(r"(\d{1,2}:\d{1,2}:\d{1,2})","2010-12-11 12:11:22,09:25:30")
['12:11:22', '09:25:30']
 
>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
<re.Match object; span=(0, 16), match='2010-12-11 12:11'>
 
>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
['2010-12-11 12:11']

匹配邮箱/手机号/身份证:

 
>>> re.search("^1[3|4|5|8]\d{9}$","18264856987")
<re.Match object; span=(0, 11), match='18264856987'>       # 匹配手机号
 
>>> re.search("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+","182648@qq.com")
<re.Match object; span=(0, 13), match='182648@qq.com'>     # 匹配一个邮箱
 
>>> re.findall(r'(^[1-8][0-7]{2}\d{3}([12]\d{3})(0[1-9]|1[012])(0[1-9]|[12]\d|3[01])\d{3}([0-9X])$)',"33070219630306041X")
[('33070219630306041X', '1963', '03', '06', 'X')]          # 匹配身份证号

针对密码验证的匹配: 此处的匹配正则常用于用户名密码的过滤.

 
>>> re.findall("[\u4e00-\u9fa5]","你好")
['你', '好']       # 匹配中文字符
 
>>> re.findall("^[\u4e00-\u9fa5_a-zA-Z0-9]{4,10}$","1233")
['1233']           # 单纯限制字符的输入长度
 
>>> re.findall(r"^[a-zA-Z][a-zA-Z0-9_]{4,15}$","password")
['password']      # 允许输入最小5-15个字符的密码,允许使用下划线.
 
>>> re.findall(r"^[a-zA-Z]\w{5,17}$","passw3")
['passw3']        # 以字母开头,长度在6~18之间,只能包含字母、数字和下划线
 
>>> re.findall("^(?!_)(?!.*?_$)[a-zA-Z0-9_\u4e00-\u9fa5]+$","1233")
['1233']          # 限制不能以下划线开头和结尾

常用匹配函数

函数与方法名	通配符匹配作用解析
regex.match	从字符串开头位置匹配查找,如果0个或多个字符被匹配则返回相应的匹配对象,如果不匹配则返回None.
regex.search	扫描整个字符串,查找正则匹配到的字串中第一次出现的位置,并返回相应的匹配对象,如果匹配失败则返回None.
regex.findall	搜索字符串中与正则表达式匹配的所有子串,也就是查找字符串中所有的匹配结果,并且以列表的形式返回数据.
regex.sub	字符串的替换,简单来说就是替换字符串中与正则表达式匹配的指定数量的子串,最后返回替换修改后的字符串.
regex.split	以正则表达式匹配的字符串作为分隔符,对一个字符串进行分割,以列表形式返回分割后的各个字符串.
match.expand	通过得到的匹配对象来构造并返回一个新的字符串,未被匹配到的分组将被替换为一个空字符串.
match.group	返回一个或多个指定捕获组所匹配到的内容,如果只有1个参数则返回单独的字符串,多参数返回元组.
match.groups	返回一个包含所有分组所匹配内容的元组,如果某个分组没有匹配到内容,则取defalult所指定的值.
match.groupdict	返回一个包含所有命名分组名称及其所匹配内容的字典对象,如果某个分组没有匹配到内容则取默认值.

regex.match() 从起始位置开始匹配,匹配成功返回一个对象,未匹配成功返回None.

 
match(pattern,string,flags=0)
# pattern： 正则模型
# string ： 要匹配的字符串
# falgs  ： 匹配模式
#------------------------------------------------
#  未分组情况下.
>>> origin = "hello alex bcd abcd lge acd 19"
>>>
>>> ret = re.match("h\w+",origin)
>>> print(ret.group())                 #获取匹配到的所有结果
>>> print(ret.groups())                #获取模型中匹配到的分组结果
>>> print(ret.groupdict())             #获取模型中匹配到的分组结果
 
#  有分组情况下. 提取匹配成功的指定内容(先匹配成功全部正则,再匹配成功的局部内容提取出来)
>>> ret = re.match("h(\w+).*(?P<name>\d)$",origin)
>>> print(r.group())                   #获取匹配到的所有结果
>>> print(r.groups())                  #获取模型中匹配到的分组结果
>>> print(r.groupdict())               #获取模型中匹配到的分组中所有执行了key的组

regex.search() 搜索整个字符串去匹配第一个符合条件的数据,未匹配成功返回None.

 
>>> origin = "hello alex bcd abcd lge acd 19"
>>>
>>> re.search("^h\w+",origin).group()          #匹配开头是h的后面是任意字符的
'hello'
>>> re.search("a\w+",origin).group()           #匹配a开头后面是任意字符的
'alex'
>>> re.search("(?P<name>a\w+)",origin).groupdict()
{'name': 'alex'}                               #分组匹配并过滤出alex
 
>>> re.search("(?P<姓名>[a-zA-Z]+)(?P<年龄>[0-9]+)","lyshark22").groupdict()
{'姓名': 'lyshark', '年龄': '22'}               #匹配字符串,并分组打印出结果

regex.findall() 获取非重复的匹配列表,且每一个匹配均是字符串,空的匹配也会包含在结果中.

 
>>> origin = "hello alex bcd abcd lge acd 19"
 
>>> re.findall("al\w+",origin)
['alex']                             #匹配到单个结果,则以单列表返回
>>> re.findall("a\w+",origin)
['alex', 'abcd', 'acd']              #匹配到多个结果,则以列表形式返回

regex.sub() 先匹配查找结果,然后进行字串的替换,也就是替换匹配成功的指定位置字符串.

 
sub(pattern,repl,string,count=0,flags=0)
# pattern： 正则模型
# repl   ： 要替换的字符串或可执行对象
# string ： 要匹配的字符串
# count  ： 指定匹配个数
# flags  ： 匹配模式
 
>>> origin = "hello alex bcd abcd lge acd 19"
 
>>> re.sub("a[a-z]+","999999",origin,1)      #匹配以a开头则字串,并替换成9999,替换1次
'hello 999999 bcd abcd lge acd 19'
>>> re.sub("a[a-z]+","999999",origin,2)      #匹配以a开头则字串,并替换成9999,替换2次
'hello 999999 bcd 999999 lge acd 19'

regex.split() 字符串切割函数,用来实现对指定字符串的分割工作,根据正则匹配分割字符串.

 
split(pattern,string,maxsplit=0,flags=0)
# pattern： 正则模型
# string ： 要匹配的字符串
# maxsplit：指定分割个数
# flags  ： 匹配模式
 
>>> origin = "hello alex bcd abcd lge acd 19"
 
>>> re.split("alex",origin,1)               #无分组切割
['hello ', ' bcd abcd lge acd 19']
>>> re.split("(alex)",origin,1)             #有分组,以alex最为分隔符,切割字符串
['hello ', 'alex', ' bcd abcd lge acd 19']

单独匹配

 
>>> ptr = re.compile(r"[A-Z]")
>>> ptr.search("Hello lyshark")
<re.Match object; span=(0, 1), match='H'>
>>> ptr.findall("Hello lyshark")
['H']

re.DOTALL

 
# 正则表达式默认以单行开始匹配的
import re
 
def re_pattern_syntax():
    # .表示任意单一字符
    # *表示前一个字符出现>=0次
    # re.DOTALL就可以匹配换行符\n,默认是以行来匹配的
    print(re.match(r'.*', 'abc\nedf').group())
    print('*' * 80)
    print(re.match(r'.*', 'abc\nedf',re.DOTALL).group())
 
if __name__ == '__main__':
    re_pattern_syntax()

re.MULTILINE

 
# 正则表达式默认以单行开始匹配的
import re
 
def re_pattern_syntax1():
    # ^表示字符串开头(单行)
    # re.MULTILINE多行匹配字符串开头
    print(re.findall(r'^abc', 'abc\nedf'))
    print('*' * 80)
    print(re.findall(r'^abc', 'abc\nabc',re.MULTILINE))
 
def re_pattern_syntax2():
    # $表示字符串结尾
    # re.MULTILINE表示行的结束
    print(re.findall(r'abc\d$', 'abc1\nabc2'))
    print('*' * 80)
    print(re.findall(r'abc\d$', 'abc1\nabc2',re.MULTILINE))
 
if __name__ == '__main__':
    re_pattern_syntax1()
    re_pattern_syntax2()

?非贪婪模式

 
import re
 
def re_pattern_syntax4():
    # greedy贪婪/non-greedy非贪婪,默认的是贪婪的匹配
    s = '<H1>title</H1>'
    print(re.match(r'<.+>', s).group())  #贪婪模式会匹配尽量多的匹配
    print(re.match(r'<.+?>', s).group()) #非贪婪模式匹配尽量少的匹配
    print(re.match(r'<(.+)>', s).group(1))
    print(re.match(r'<(.+?)>', s).group(1))
 
def re_pattern_syntax5():
    # {m}/{m,}/{m,n}
    print(re.match(r'ab{2,4}', 'abbbbbbb').group())  #贪婪模式尽量匹配多
    print(re.match(r'ab{2,4}?', 'abbbbbbb').group()) #非贪婪模式尽量匹配少
    print('*' * 80)
 
if __name__ == '__main__':
    re_pattern_syntax4()
    re_pattern_syntax5()

re.I/re.IGNORECASE

 
import re
 
def re_pattern_flags():
    # re.I/re.IGNORECASE
    print(re.match(r'(Name)\s*:\s*(\w+)','NAME : Joey',re.IGNORECASE).groups())
    print('*' * 80)
 
if __name__ == '__main__':
    re_pattern_syntax_meta_char()

re.VERBOSE

 
import re
 
def re_pattern_flags1():
    # re.VERBOSE此标识位可以添加注释/re.compile
    s = 'the number is 20.5'
    r = re.compile(r'''
                    \d+   # 整数部分
                    \.?   # 小数点，可能包含也可能不包含
                    \d*   # 小数部分,可选
                    ''',re.VERBOSE)
    print(re.search(r,s).group())
    print(r.search(s).group())
    print('*' * 80)
 
if __name__ == '__main__':
    re_pattern_syntax_meta_char1()

	>>> re.search("hel.o","hello lyshark,hello world").group()
	'hello'

	>>> re.findall("hel.o","hello lyshark hello world")
	['hello', 'hello']

	>>> re.findall("ab*","abccba23acbcabb")
	['ab', 'a', 'a', 'abb']

	>>> re.findall("ab?","ab,abc,abb,abcd,a,acd,abc")
	['ab', 'ab', 'ab', 'ab', 'a', 'a', 'ab']

	>>> re.findall("ab?","ab,a,abc,abcde")
	['ab', 'a', 'ab', 'ab']

	>>> re.search(r"^h","hello world").group()
	'h'
	>>> re.search(r"world$","hello\nworld").group()
	'world'
	>>> re.search(r"^a","\nabc\ndef",flags=re.MULTILINE).group()
	'a'
	>>> re.search("foo$","bfoo\nsdfsf",flags=re.MULTILINE).group()
	'foo'

	>>> re.search("hello{2}","hello,helloo,hellooo,helloooo").group()
	'helloo'

	>>> re.search("hello{3}","hello,helloo,hellooo,helloooo").group()
	'hellooo'

	>>> re.search("hello{1,2}","hello,helloo,hellooo,helloooo").group()
	'hello'

	>>> re.findall("hello{1,2}","hello,helloo,hellooo,helloooo")
	['hello', 'helloo', 'helloo', 'helloo']

	>>> re.search("..\\t","hello\t lyshark\n").group()
	'lo\t'
	>>> re.search("\\t","hello\t lyshark\n").group()
	'\t'
	>>> re.search("\t","hello\t lyshark\n").group()
	'\t'
	>>> re.search(r"\\","hello\\lyshark").group()
	'\\'

	>>> re.search("[0-9]","hello 1,2,3,4,5").group() # 匹配第一次出现数字的行
	'1'
	>>> re.search("[0-9]","hello a12 b23 34a 45t").group()
	'1'
	>>> re.findall("[0-9]","hello 1,2,3,4,5") # 匹配所有出现数字的行
	['1', '2', '3', '4', '5']
	>>> re.findall("[0-9]","hello b23 34a 45t wan")
	['2', '3', '3', '4', '4', '5']
	>>> re.search("[^0-9]","hello 1,2,3,4,5").group() # 匹配开头不是0-9的单个字符
	'h'
	>>> re.search("[^0-9]*","hello 1,2,3,4,5").group() # 匹配开头不是0-9的单行行
	'hello'
	>>> re.search(r"[aeiou]","Hello LyShark").group()
	'e'

	>>> re.search("\s+","ab\tc1\n3").group()
	'\t'
	>>> re.search("\s+","ab c1\n3").group()
	' '

	>>> re.search("abc\|ABC","ABCBabcCD").group()
	'ABC'

	>>> re.findall("abc\|ABC","ABCBabcCD")
	['ABC', 'abc']

	>>> number = "371481199306143242"
	>>> re.search("(?P<province>[0-9]{4})(?P<city>[0-9]{2})(?P<birthday>[0-9]{4})",number).groupdict()
	{'province': '3714', 'city': '81', 'birthday': '1993'}

	>>> re.search("(?P<name>[a-zA-Z]+)(?P<age>[0-9]+)","lyshark22").groupdict("temp")
	{'name': 'lyshark', 'age': '22'}

	>>> re.search("^(25[0-5]\|2[0-4]\d\|[0-1]?\d?\d)(\.(25[0-5]\|2[0-4]\d\|[0-1]?\d?\d)){3}$","192.168.1.1")
	<re.Match object; span=(0, 11), match='192.168.1.1'> # 匹配IP地址

	>>> re.match(r"^\s\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\s$","192.168.1.100")
	<re.Match object; span=(0, 13), match='192.168.1.100'> # 匹配IP地址

	>>> string_ip = "is this 236.168.192.1 ip 12321"
	>>> result = re.findall(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", string_ip)
	>>> result
	['236.168.192.1']

	>>> string=re.compile(r'((1\d\d\|2[0-4]\d\|25[0-5]\|[1-9]\d\|\d)\.){3}(1\d\d\|2[0-4]\d\|25[0-5]\|[1-9]\d\|\d)')
	>>> print(string.search('245.255.256.25asdsa10.11.244.10').group())
	10.11.244.10

	>>> string_IPv6="1050:0:0:0:5:600:300c:326b" # 匹配IPV6地址(大小写不敏感)
	>>> re.match(r"^(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}$", string_IPv6, re.I)
	<re.Match object; span=(0, 26), match='1050:0:0:0:5:600:300c:326b'>

	>>> re.findall(r"(?<![:.\w])(?:[A-F0-9]{1,4}:){7}[A-F0-9]{1,4}(?![:.\w])", string_IPv6, re.I)
	['1050:0:0:0:5:600:300c:326b']

	>>> re.match(r"^\s([0-9a-fA-F]{2,2}:){5,5}[0-9a-fA-F]{2,2}\s$","AB:1F:44:5B:3B:4A")
	<re.Match object; span=(0, 17), match='AB:1F:44:5B:3B:4A'> # 匹配一个MAC地址

	>>> re.search(r"^(http\|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-])\/?$","https://www.baidu.com")
	<re.Match object; span=(0, 21), match='https://www.baidu.com'> # 匹配网址

	>>> re.findall(r"([0-9]\|[1-9]\d{1,3}\|[1-5]\d{4}\|6[0-4]\d{4}\|65[0-4]\d{2}\|655[0-2]\d\|6553[0-5])","hello 443")
	['4', '4', '3'] # 匹配端口号

	>>> re.search(r'^(http\|https?:\/\/)([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-])\/?(:([0-9]\|[1-9]\d{1,3}\|[1-5]\d{4}\|6[0-4]\d{4}\|65[0-4]\d{2}\|655[0-2]\d\|6553[0-5]))?$',"http://www.baidu.com:80")
	<re.Match object; span=(0, 23), match='http://www.baidu.com:80'>

	>>> re.search(r'^(\d\|[1-9]\d\|1\d{2}\|2[0-4]\d\|25[0-5])\.(\d\|[1-9]\d\|1\d{2}\|2[0-4]\d\|25[0-5])\.(\d\|[1-9]\d\|1\d{2}\|2[0-4]\d\|25[0-5])\.(\d\|[1-9]\d\|1\d{2}\|2[0-4]\d\|25[0-5])(:([0-9]\|[1-9]\d{1,3}\|[1-5]\d{4}\|6[0-4]\d{4}\|65[0-4]\d{2}\|655[0-2]\d\|6553[0-5]))?$',"192.168.1.100:443")
	<re.Match object; span=(0, 17), match='192.168.1.100:443'>

	>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12")
	<re.Match object; span=(0, 10), match='2019-01-12'>

	>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2})","2019-01-12,2010-12-11")
	['2019-01-12', '2010-12-11']

	>>> re.findall(r"\d{4}[-/]\d{2}[-/]\d{2}","2019-01-12,2010/12/11")
	['2019-01-12', '2010/12/11']

	>>> re.search(r"(\d{1,2}/(Jan\|Feb\|Mar\|Apr\|Jun\|Jul\|Aug\|Sep\|Oct\|Nov\|Dec)/\d{4})","2019-01-12,21/Nov/2019").group()
	'21/Nov/2019'

	>>> re.findall(r"(\d{1,2}:\d{1,2})","2010-12-11 12:11")
	['12:11']

	>>> re.findall(r"(\d{1,2}:\d{1,2}:\d{1,2})","2010-12-11 12:11:22,09:25:30")
	['12:11:22', '09:25:30']

	>>> re.search(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
	<re.Match object; span=(0, 16), match='2010-12-11 12:11'>

	>>> re.findall(r"(\d{4}-\d{1,2}-\d{1,2}\s\d{1,2}:\d{1,2})","2010-12-11 12:11")
	['2010-12-11 12:11']

	>>> re.search("^1[3\|4\|5\|8]\d{9}$","18264856987")
	<re.Match object; span=(0, 11), match='18264856987'> # 匹配手机号

	>>> re.search("[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\.[a-zA-Z0-9_-]+)+","182648@qq.com")
	<re.Match object; span=(0, 13), match='182648@qq.com'> # 匹配一个邮箱

	>>> re.findall(r'(^[1-8][0-7]{2}\d{3}([12]\d{3})(0[1-9]\|1[012])(0[1-9]\|[12]\d\|3[01])\d{3}([0-9X])$)',"33070219630306041X")
	[('33070219630306041X', '1963', '03', '06', 'X')] # 匹配身份证号

	>>> re.findall("[\u4e00-\u9fa5]","你好")
	['你', '好'] # 匹配中文字符

	>>> re.findall("^[\u4e00-\u9fa5_a-zA-Z0-9]{4,10}$","1233")
	['1233'] # 单纯限制字符的输入长度

	>>> re.findall(r"^[a-zA-Z][a-zA-Z0-9_]{4,15}$","password")
	['password'] # 允许输入最小5-15个字符的密码,允许使用下划线.

	>>> re.findall(r"^[a-zA-Z]\w{5,17}$","passw3")
	['passw3'] # 以字母开头,长度在6~18之间,只能包含字母、数字和下划线

	>>> re.findall("^(?!_)(?!.*?_$)[a-zA-Z0-9_\u4e00-\u9fa5]+$","1233")
	['1233'] # 限制不能以下划线开头和结尾

	match(pattern,string,flags=0)
	# pattern：正则模型
	# string ：要匹配的字符串
	# falgs ：匹配模式
	#------------------------------------------------
	# 未分组情况下.
	>>> origin = "hello alex bcd abcd lge acd 19"
	>>>
	>>> ret = re.match("h\w+",origin)
	>>> print(ret.group()) #获取匹配到的所有结果
	>>> print(ret.groups()) #获取模型中匹配到的分组结果
	>>> print(ret.groupdict()) #获取模型中匹配到的分组结果

	# 有分组情况下. 提取匹配成功的指定内容(先匹配成功全部正则,再匹配成功的局部内容提取出来)
	>>> ret = re.match("h(\w+).*(?P<name>\d)$",origin)
	>>> print(r.group()) #获取匹配到的所有结果
	>>> print(r.groups()) #获取模型中匹配到的分组结果
	>>> print(r.groupdict()) #获取模型中匹配到的分组中所有执行了key的组

	>>> origin = "hello alex bcd abcd lge acd 19"
	>>>
	>>> re.search("^h\w+",origin).group() #匹配开头是h的后面是任意字符的
	'hello'
	>>> re.search("a\w+",origin).group() #匹配a开头后面是任意字符的
	'alex'
	>>> re.search("(?P<name>a\w+)",origin).groupdict()
	{'name': 'alex'} #分组匹配并过滤出alex

	>>> re.search("(?P<姓名>[a-zA-Z]+)(?P<年龄>[0-9]+)","lyshark22").groupdict()
	{'姓名': 'lyshark', '年龄': '22'} #匹配字符串,并分组打印出结果

	>>> origin = "hello alex bcd abcd lge acd 19"

	>>> re.findall("al\w+",origin)
	['alex'] #匹配到单个结果,则以单列表返回
	>>> re.findall("a\w+",origin)
	['alex', 'abcd', 'acd'] #匹配到多个结果,则以列表形式返回

	sub(pattern,repl,string,count=0,flags=0)
	# pattern：正则模型
	# repl ：要替换的字符串或可执行对象
	# string ：要匹配的字符串
	# count ：指定匹配个数
	# flags ：匹配模式

	>>> origin = "hello alex bcd abcd lge acd 19"

	>>> re.sub("a[a-z]+","999999",origin,1) #匹配以a开头则字串,并替换成9999,替换1次
	'hello 999999 bcd abcd lge acd 19'
	>>> re.sub("a[a-z]+","999999",origin,2) #匹配以a开头则字串,并替换成9999,替换2次
	'hello 999999 bcd 999999 lge acd 19'

	split(pattern,string,maxsplit=0,flags=0)
	# pattern：正则模型
	# string ：要匹配的字符串
	# maxsplit：指定分割个数
	# flags ：匹配模式

	>>> origin = "hello alex bcd abcd lge acd 19"

	>>> re.split("alex",origin,1) #无分组切割
	['hello ', ' bcd abcd lge acd 19']
	>>> re.split("(alex)",origin,1) #有分组,以alex最为分隔符,切割字符串
	['hello ', 'alex', ' bcd abcd lge acd 19']

	>>> ptr = re.compile(r"[A-Z]")
	>>> ptr.search("Hello lyshark")
	<re.Match object; span=(0, 1), match='H'>
	>>> ptr.findall("Hello lyshark")
	['H']

	# 正则表达式默认以单行开始匹配的
	import re

	def re_pattern_syntax():
	# .表示任意单一字符
	# *表示前一个字符出现>=0次
	# re.DOTALL就可以匹配换行符\n,默认是以行来匹配的
	print(re.match(r'.*', 'abc\nedf').group())
	print('' 80)
	print(re.match(r'.*', 'abc\nedf',re.DOTALL).group())

	if __name__ == '__main__':
	re_pattern_syntax()

	# 正则表达式默认以单行开始匹配的
	import re

	def re_pattern_syntax1():
	# ^表示字符串开头(单行)
	# re.MULTILINE多行匹配字符串开头
	print(re.findall(r'^abc', 'abc\nedf'))
	print('' 80)
	print(re.findall(r'^abc', 'abc\nabc',re.MULTILINE))

	def re_pattern_syntax2():
	# $表示字符串结尾
	# re.MULTILINE表示行的结束
	print(re.findall(r'abc\d$', 'abc1\nabc2'))
	print('' 80)
	print(re.findall(r'abc\d$', 'abc1\nabc2',re.MULTILINE))

	if __name__ == '__main__':
	re_pattern_syntax1()
	re_pattern_syntax2()

	import re

	def re_pattern_syntax4():
	# greedy贪婪/non-greedy非贪婪,默认的是贪婪的匹配
	s = '<H1>title</H1>'
	print(re.match(r'<.+>', s).group()) #贪婪模式会匹配尽量多的匹配
	print(re.match(r'<.+?>', s).group()) #非贪婪模式匹配尽量少的匹配
	print(re.match(r'<(.+)>', s).group(1))
	print(re.match(r'<(.+?)>', s).group(1))

	def re_pattern_syntax5():
	# {m}/{m,}/{m,n}
	print(re.match(r'ab{2,4}', 'abbbbbbb').group()) #贪婪模式尽量匹配多
	print(re.match(r'ab{2,4}?', 'abbbbbbb').group()) #非贪婪模式尽量匹配少
	print('' 80)

	if __name__ == '__main__':
	re_pattern_syntax4()
	re_pattern_syntax5()

	import re

	def re_pattern_flags():
	# re.I/re.IGNORECASE
	print(re.match(r'(Name)\s:\s(\w+)','NAME : Joey',re.IGNORECASE).groups())
	print('' 80)

	if __name__ == '__main__':
	re_pattern_syntax_meta_char()

	import re

	def re_pattern_flags1():
	# re.VERBOSE此标识位可以添加注释/re.compile
	s = 'the number is 20.5'
	r = re.compile(r'''
	\d+ # 整数部分
	\.? # 小数点，可能包含也可能不包含
	\d* # 小数部分,可选
	''',re.VERBOSE)
	print(re.search(r,s).group())
	print(r.search(s).group())
	print('' 80)

	if __name__ == '__main__':
	re_pattern_syntax_meta_char1()