前言

之前对正则的理解一直是半懂不懂，现在和houdunren学一波正则。本篇是js正则相关的案例与细节学习笔记

十分建议配套quokka使用,是vscode一个显示代码实时结果的一个插件,非常方便.

学习笔记GitHub仓库:传送门

取出数字

// 取出数字
// case: jojo123jojo321
// 正常写法：
let test = "jojo123jojo321";
let num = [...test].filter(n=>!Number.isNaN(parseInt(n)));
console.log(num.join(''))

// 正则写法
console.log(test.match(/\d/g).join(''))

字面量创建正则/存放变量

//字面量创建正则
let test = "jojo123jojo321";
console.log(/o/.test(test))

//字面量中放变量
let useO = 'o';
console.log(eval(`/${useO}/`).test(test))

对象创建正则

let test = "jojolin";
let a = "lin";
let reg = new RegExp(a, "g"); //g代表全局
//是否包含某个字符串
console.log(reg.test(test));


// replace 后面也可以根一个函数自定义替换
test = test.replace(/\w/g,(str)=>{
    return 'dio'
})
console.log(test)

选择符

let test = "jojolin";
// | 表示或
console.log(/j|k/.test(test))

let tel = '010-9999999'
let tel2 = '010'

console.log(/(010|020)\-\d{7,8}/.test(tel))
console.log(/(010|020)\-\d{7,8}/.test(tel2))

原子表和原子组

//原子表和原子组
let reg = /[123456]/
let hd = '123123'
// 原子表 [] 表示任选里面的其中一个/多选一 注意只能选一
console.log(hd.match(reg))

// 原子组 () 整体正则
let reg2 = /(12|34)/;//表示12是一个整体或者34是一个整体
console.log(hd.match(reg2))

正则表达式中的转义

//正则表达式中的转义

let price = 23.34

// .有两层含义 优先级最高的：除换行外所有字符 正常：普通的点
// 所以我们需要对.进行转义
console.log(/\d+\.\d+/.test(price))

// 在对象里面就又不一样了,需要多一层转义 建议先看看对应字符串转出来是否和字面量相同
let str = "\\d+\\.\\d+";
console.log(str)
let reg = new RegExp(str)
console.log(reg.test(price)) 


// 匹配是否合法网站
let url = 'https://www.baidu.com';
console.log(/^https?:\/\/\w+\.\w+\.\w+/.test(url))

字符边界约束

// 字符边界约束
let test = "123";
// ^匹配以x开头的 $匹配以x结束的 \d匹配单个数字
console.log(/^\d$/.test(test))
// 上面有个坑点就是\d只匹配一个数字 需要使用\d+
console.log(/^\d+$/.test(test))


//案例 使得字符串满足3-6位是字母且只有3-6位
const reg  = /^[a-z]{3,6}$/
console.log(''.match(reg))
console.log('a'.match(reg))
console.log('ab'.match(reg))
console.log('abc'.match(reg))
console.log('abcd'.match(reg))
console.log('abcde'.match(reg))
console.log('abcdef'.match(reg))
console.log('abcdefg'.match(reg))

// 所以得出 限定的情况下 就使用开头符和结束符号,顺便这里复习一下原子表 是多选1

数值与空白元字符

//数值与空白元字符

// 匹配中文除外的所有字符
let hd = `
    张三:010-9999999,李四:020-99999999
`;

// 前面讲到的\d 其实是匹配0-9的数字 \d+是匹配多个这样的数字

// \D 则相反 是匹配所有的非数字

// 再次复习一下 我们想用正则取出东西 可以使用match 会把匹配的结果返回数组回来 或者replace去掉不想要的 再或者exec逐个匹配

// 因为我们不知道怎么直接匹配中文 所以这里把除了中文之外的排除也能拿到中文

// \s 表示匹配到空白 \n这种也算空白
// ^在原子表里面是非的意思 也就是不要这里面任意一个
console.log(hd.match(/[^\d,:-\s]+/g));

w与W元字符

// w与W元字符

// \w 表示字母数字下划线 已经囊括了\d

// 实现邮箱匹配
let email = '9813123123@qq.com'
console.log(email.match(/^\w+@\w+\.\w+$/));

点元字符的使用

// 点元字符的使用
// 因为.只能匹配除了换行符的 也就是遇到换行他就停止了，那么在模板的这个情况下是走不通的
// 使用 /s 视为单行匹配，就可以了
let hd = `
    jojo
    jojolin
`
console.log(hd.match(/.+/)[0])
console.log(hd.match(/.+/s))

匹配所有字符

// 匹配所有字符
let hd = `
    <span>
        jojolin @abc
        @@@cajsocj    
    </span>
`   
// [\s\S]表示所有字符 当然[\d\D]也是
console.log(hd.match(/<span>[\s\S]+<\/span>/))

模式修正符gi

// 模式修正符

// /i忽略大小写 /g全局匹配 /gi 模式组合
let text = 'jojoJOJOJO'

console.log(text.match(/o/gi))

// 替换
console.log(text.replace(/o/gi,'?'))

模式修正符m

let hd = `
    #1 js,200元 #
    #2 php,300元 #
    #55 houdunren.com # 后盾人
    #56 nodejs,180元 #
`;
// 模式m修正符将多行中每一行单独看待 
// 不用m的话 这个语法 使用.会匹配到# 然后\s+#会匹配到下一行的# 很明显不是想要的结果
console.log(hd.match(/\s*#\d+\s+.+\s+#/g));
// 使用m后
console.log(hd.match(/^\s*#\d+\s+.+\s+#$/gm));
let lessons = hd.match(/^\s*#\d+\s+.+\s+#$/gm).map((v) => {
  v = v.replace(/^\s*#\d+\s+/, "").replace(/\s#$/, "")
  console.log(v)
  let [name,price] = v.split(',');
  return {
    name,price
  }
});

console.log(JSON.stringify(lessons,null,2))

模式修正符u

// 模式u unicode
// 每个字符都有相应的属性去区分

// 这里虽然不是很懂 但是有几个例子 

let text = 'jojostoneocean2022.,}[com笑死我了哈哈哈'

//检测字符的属性为字母的情况 (中文也加进去了？？)
console.log(text.match(/\p{L}/gu))

//检测标点符号
console.log(text.match(/\p{P}/gu))


//检测中文
console.log(text.match(/\p{sc=Han}/gu))

lastIndex属性的使用

//lastIndex属性的使用

let text = 'jojo'
//正常这样操作会得到匹配字符的属性
console.log(text.match(/\w/))

//加了g全匹配之后 就丢失了相关属性
console.log(text.match(/\w/g))

// 使用exec
const reg = /\w/g
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)

// 这个属性会有一个index 每次执行的时候index就会+1 直到走完
const reg2 = /\w/g

// 所以我们使用循环
while((res = reg2.exec(text))){
    console.log(res)
}
console.log(res) 

// 注意exec只有在全模式g的情况下 才会逐个搜索 
const reg3 = /\w/
console.log(reg3.lastIndex)
console.log(reg3.exec(text))
console.log(reg3.lastIndex)
console.log(reg3.exec(text))
console.log(reg3.lastIndex)
console.log(reg3.exec(text))
console.log(reg3.lastIndex)
console.log(reg3.exec(text))
console.log(reg3.lastIndex)


// 注意reg在最后一个搜索完之后又会开始重新搜索
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)
console.log(reg.exec(text))
console.log(reg.lastIndex)

// 总结 exec就是一个可重新执行的迭代器

有效率的y模式

// 有效率的y模式

// 看个例子

let text = 'asdasdasdas的,213123123,33333,asdasd'
const reg = /\d/g

console.log(reg.exec(text))
console.log(reg.exec(text))
console.log(reg.exec(text))
console.log(reg.exec(text))
console.log(reg.exec(text))
console.log(reg.exec(text))
console.log(reg.exec(text))

// 模式g会使得整个需要匹配的字符串都走下去，假设我只需要取出里面的某段连续数字，然后后面又没有连续数字，使用m性能上就不太好

// 所以诞生了个模式y，匹配不到就返回null 后面就不匹配了
const reg2 = /(\d+,?)/y
reg2.lastIndex = 13
while((res=reg2.exec(text))){
    console.log(res)
}

// 其实也有弊端 也就是y模式依赖于lastIndex 得事先知道他在哪里才行

原子表原子组新使用

// 原子表 新使用

// 给个具体案例如下

let date = '2022-01-22'

// 要求date可以是-连接也可以是/连接 但是前后的符号必须一致

let date2 = '2022/01/22';
let date3 = '2022-01/22';

// 这里注意的是/需要转义
const reg = /^\d{4}[-\/]\d{2}[-\/]\d{2}/
console.log(reg.exec(date))
console.log(reg.exec(date2))
console.log(reg.exec(date3))

//根据结果我们发现date3 这个情况也被匹配了

// 改写reg 使用原子组加原子表
const reg2 = /^\d{4}([-\/])\d{2}\1\d{2}/

console.log(reg2.exec(date))
console.log(reg2.exec(date2))
console.log(reg2.exec(date3))

// 原理是原子组里面的东西是一个整体 后续用\1表示采用之前的结果 所以前后一致才满足

区间匹配

// 区间匹配

// 其实之前用过了 注意的点是 区间匹配不能降序 只能升序
let text = '213123jojoj';

console.log(text.match(/[0-9]/g));
// console.log(text.match(/[9-0]/g));

排除匹配

// 排除匹配

let text = '张三:010-9999999;李四:020-9999999'
console.log(text.match(/[^\d\-:;]+/g))

原子表字符不解析

// 原子表字符不解析

let text = '(jo-jo).+'
//原子组在外面才是原子组,在原子表里面是括号的意思
console.log(text.match(/[()]/g))
// .+在原子表里面也是单一的含义 就是本身的意思 不需要转义
console.log(text.match(/[.+]/g))
// 直接用- 本身他有两个意思 一个是-自身 一个是区间匹配 正常情况下我们会转义-来保证他是第一层意思
console.log(text.match(/[-]/g))
console.log(text.match(/[\-]/g))
console.log(text.match(/[j\-]/g))
console.log(text.match(/[a-z]/g))

案例删除dom元素

// 删除dom元素

// 案例
let dom = `
    <h1>
        jojo
    </h1>
`
let dom2 = `
    <H3>
    </H3>
`

const reg = /<(h[1-6])>[\s\S]*<\/\1>/gi
console.log(dom.replace(reg,''))
console.log(dom2.replace(reg,''))

邮箱验证中原子组的使用

// 邮箱验证中原子组的使用

let test1 = '123131313@qq.com';
let test2 = '123131313@sina.com.cn';
let test3 = 'jojo-123@qq.com';

// 我们可以看到对于第二种情况是不行的 要继续修改
console.log(test1.match(/^[\w-]+@\w+\.(com|cn|org)$/))
console.log(test2.match(/^[\w-]+@\w+\.(com|cn|org)$/))
console.log(test3.match(/^[\w-]+@\w+\.(com|cn|org)$/))

// (\w+\.)+ 表示这样一组的可能有很多个 但是最后都要以com cn org结尾
console.log(test2.match(/^[\w-]+@(\w+\.)+(com|cn|org)$/))

//测试
console.log(test1.match(/^[\w-]+@(\w+\.)+(com|cn|org)$/))
console.log(test2.match(/^[\w-]+@(\w+\.)+(com|cn|org)$/))
console.log(test3.match(/^[\w-]+@(\w+\.)+(com|cn|org)$/))

原子组引用完成替换操作

// 原子组引用完成替换操作
let text = `
    <h1>jojo</h1>
    <span>jojolin</span>
    <h3>jojostoneocean</h3>
`;

let reg = /<(h[1-6])>([\s\S]+)<\/\1>/g;
console.log(text.match(reg));

// 这里取出第二个括号内的内容出来 并替换整个字符串
// 确定index的技巧是数括号 第一个对于1 以此类推
console.log(text.replace(reg, "<p>$2</p>"));

text.replace(reg,(p0,p1,p2,p3,p4)=>{
    console.log(p0)
    console.log(p1)
    console.log(p2)
})