This commit is contained in:
parent
b63febdd66
commit
37235432bc
2
es/es.go
2
es/es.go
|
@ -26,7 +26,7 @@ func NewElasticSearch() (es *ElasticSearch, err error) {
|
|||
}
|
||||
|
||||
const (
|
||||
IndexStreamerAcct = "streamer_acct"
|
||||
IndexStreamerAcct = "new_streamer_acct"
|
||||
TypeStreamerAcct = "_doc"
|
||||
)
|
||||
|
||||
|
|
|
@ -9,9 +9,11 @@ import (
|
|||
goproto "google.golang.org/protobuf/proto"
|
||||
)
|
||||
|
||||
var url = "mongodb://root:Wishpal2024@dds-bp1da1ddd62bede41.mongodb.rds.aliyuncs.com:3717,dds-bp1da1ddd62bede42.mongodb.rds.aliyuncs.com:3717/admin?replicaSet=mgset-77304659"
|
||||
|
||||
func NewMongoClient() (*qmgo.Client, error) {
|
||||
clientCfg := &qmgo.Config{
|
||||
Uri: "mongodb://root:Wishpal2024@dds-bp1da1ddd62bede41.mongodb.rds.aliyuncs.com:3717,dds-bp1da1ddd62bede42.mongodb.rds.aliyuncs.com:3717/admin?replicaSet=mgset-77304659",
|
||||
Uri: "mongodb://admin:Wishpal%402023@172.31.37.71:27017",
|
||||
ConnectTimeoutMS: goproto.Int64(30000),
|
||||
MaxPoolSize: goproto.Uint64(16),
|
||||
MinPoolSize: goproto.Uint64(0),
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
# EditorConfig is awesome: http://EditorConfig.org
|
||||
|
||||
# top-most EditorConfig file
|
||||
root = true
|
||||
|
||||
# Unix-style newlines with a newline ending every file
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
insert_final_newline = true
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
# Indentiation
|
||||
[*.{py,rst}]
|
||||
indent_style = space
|
||||
indent_size = 4
|
||||
[{Makefile,*.go}]
|
||||
indent_style = tab
|
||||
indent_size = 4
|
||||
[*.{ini,yml}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
|
@ -0,0 +1,26 @@
|
|||
# Compiled Object files, Static and Dynamic libs (Shared Objects)
|
||||
*.o
|
||||
*.a
|
||||
*.so
|
||||
|
||||
# Folders
|
||||
_obj
|
||||
_test
|
||||
|
||||
# Architecture specific extensions/prefixes
|
||||
*.[568vq]
|
||||
[568vq].out
|
||||
|
||||
*.cgo1.go
|
||||
*.cgo2.c
|
||||
_cgo_defun.c
|
||||
_cgo_gotypes.go
|
||||
_cgo_export.*
|
||||
|
||||
_testmain.go
|
||||
|
||||
*.exe
|
||||
*.test
|
||||
*.prof
|
||||
*~
|
||||
*.swp
|
|
@ -0,0 +1,3 @@
|
|||
[submodule "_tools/pinyin-data"]
|
||||
path = _tools/pinyin-data
|
||||
url = https://github.com/mozillazg/pinyin-data.git
|
|
@ -0,0 +1,248 @@
|
|||
# Changelog
|
||||
|
||||
## [0.20.0] (2023-05-14)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.13.0 的拼音数据
|
||||
|
||||
|
||||
## [0.19.0] (2021-12-11)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.12.0 的拼音数据
|
||||
|
||||
|
||||
## [0.18.0] (2020-06-13)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.9.0 的拼音数据
|
||||
* **Bugfixed** 修复自定义的 Fallback 函数可能会导致结果乱码的问题 Fixes [#35]
|
||||
|
||||
## [0.17.0] (2020-04-09)
|
||||
|
||||
* **Changed** 因为依赖的 gojieba 经常出现安装异常,撤销 v0.16.0 的修改,撤销后 v0.17.0 的代码跟 v0.15.0 基本是一样的。
|
||||
如果有需要使用 v0.16.0 新增的 ``func Paragraph(p string) string`` 功能的请使用 v0.16.0 版本或者通过 v0.16.0 中相关代码实现类似的需求。
|
||||
|
||||
## [0.16.0] (2019-12-05)
|
||||
|
||||
* **NEW** 增加 ``func Paragraph(p string) string`` 用于便捷处理大段文字
|
||||
(thanks [@huacnlee] via [#37][#37])
|
||||
|
||||
## [0.15.0] (2019-04-06)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.7.0 的拼音数据
|
||||
* **NEW** 添加 go.mod 文件
|
||||
|
||||
|
||||
## [0.14.0] (2018-08-05)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.6.1 的拼音数据
|
||||
* **Changed** 命令行工具移到 `cmd/pinyin/` 目录下,现在需要改为使用
|
||||
`go get -u github.com/mozillazg/go-pinyin/cmd/pinyin` 来安装命令行工具。
|
||||
|
||||
|
||||
## [0.13.0] (2018-04-29)
|
||||
|
||||
* **Changed** 使用 [pinyin-data][pinyin-data] v0.5.1 的拼音数据 (via [#30])
|
||||
* **Changed** 修改命令行工具 `-s` 参数的值(thanks [@wdscxsj][@wdscxsj] via [#19][#19]):
|
||||
* `Normal` 改为 `zhao`
|
||||
* `Tone` 改为 `zh4ao`
|
||||
* `Tone2` 改为 `zha4o`
|
||||
* `Tone3` 改为 `zhao4`
|
||||
* `Initials` 改为 `zh`
|
||||
* `FirstLetter` 改为 `z`
|
||||
* `Finals` 改为 `ao`
|
||||
* `FinalsTone` 改为 `4ao`
|
||||
* `FinalsTone2` 改为 `a4o`
|
||||
* `FinalsTone3` 改为 `ao4`
|
||||
* **Changed** 严格限制命令行参数中 `-s` 选项的值(thanks [@wdscxsj][@wdscxsj] via [#20][#20]):
|
||||
|
||||
|
||||
## [0.12.0] (2017-04-25)
|
||||
|
||||
|
||||
* **NEW** 命令行程序支持通过 -s 指定新增的 `Tone3` 和 `FinalsTone3` 拼音风格
|
||||
|
||||
$ pinyin -s Tone3 请至少输入一个汉字
|
||||
qing3 zhi4 shao3 shu1 ru4 yi1 ge4 han4 zi4
|
||||
|
||||
$ pinyin -s FinalsTone3 请至少输入一个汉字
|
||||
ing3 i4 ao3 u1 u4 i1 e4 an4 i4
|
||||
|
||||
* **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.1
|
||||
|
||||
|
||||
## [0.11.0] (2016-10-28)
|
||||
|
||||
* **Changed** 不再使用 `0` 表示轻声(因为之前并没有正确的实现这个功能, 同时也觉得这个功能没必要)。
|
||||
顺便修复了 Tone2 中 `ü` 标轻声的问题(像 `侵略 -> qi1n lv0e4` )
|
||||
* **NEW** 新增 `Tone3` 和 `FinalsTone3` 拼音风格。
|
||||
|
||||
hans := "中国人"
|
||||
args := pinyin.NewArgs()
|
||||
args.Style = pinyin.Tone3
|
||||
fmt.Println("Tone3:", pinyin.Pinyin(hans, args))
|
||||
// Output: Tone3: [[zhong1] [guo2] [ren2]]
|
||||
|
||||
args.Style = pinyin.FinalsTone3
|
||||
fmt.Println("FinalsTone3:", pinyin.Pinyin(hans, args))
|
||||
// Output: FinalsTone3: [[ong1] [uo2] [en2]]
|
||||
|
||||
|
||||
|
||||
## [0.10.0] (2016-10-18)
|
||||
|
||||
* **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.4.0
|
||||
|
||||
|
||||
## [0.9.0] (2016-09-04):
|
||||
|
||||
* **NEW** 新增 `func Convert(s string, a *Args) [][]string`
|
||||
* **NEW** 新增 `func LazyConvert(s string, a *Args) []string`
|
||||
|
||||
之所以增加这两个函数是希望 `a` 参数支持 `nil`
|
||||
|
||||
|
||||
|
||||
## [0.8.0] (2016-08-19)
|
||||
|
||||
* **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.3.0
|
||||
* Fixed [#13](https://github.com/mozillazg/go-pinyin/issues/13) . thanks [@aisq2008](https://github.com/aisq2008)
|
||||
* Fixed pinyin of 罗
|
||||
|
||||
|
||||
## [0.7.0] (2016-08-02)
|
||||
|
||||
* **Changed** use [pinyin-data](https://github.com/mozillazg/pinyin-data) v0.2.0
|
||||
* **Improved** golint and gofmt
|
||||
|
||||
|
||||
## [0.6.0] (2016-05-14)
|
||||
|
||||
* **NEW** 命令行程序支持指定拼音风格:
|
||||
|
||||
```shell
|
||||
$ pinyin -s Normal 你好
|
||||
ni hao
|
||||
```
|
||||
* **Bugfixed** 解决韵母 i, u, ü 的问题:根据以下拼音方案,还原出正确的韵母
|
||||
[#8](https://github.com/mozillazg/go-pinyin/pull/8), [python-pinyin#26](https://github.com/mozillazg/python-pinyin/pull/26)
|
||||
|
||||
> i 行的韵母,前面没有声母的时候,写成:yi(衣),yɑ(呀),ye(耶),
|
||||
> yɑo(腰),you(忧),yɑn(烟),yin(因),yɑnɡ(央),yinɡ(英),yonɡ(雍)。
|
||||
>
|
||||
> u 行的韵母,前面没有声母的时候,写成wu(乌),wɑ(蛙),wo(窝),
|
||||
> wɑi(歪),wei(威),wɑn(弯),wen(温),wɑnɡ(汪),wenɡ(翁)。
|
||||
>
|
||||
> ü行的韵母跟声母j,q,x拼的时候,写成ju(居),qu(区),xu(虚),
|
||||
> ü上两点也省略;但是跟声母l,n拼的时候,仍然写成lü(吕),nü(女)。
|
||||
|
||||
**注意** `y` 既不是声母也不是韵母。详见 [汉语拼音方案](http://www.edu.cn/20011114/3009777.shtml)
|
||||
|
||||
* **Bugfixed** 解决未正确处理鼻音 ḿ, ń, ň, ǹ 的问题:包含鼻音的拼音不应该有声母
|
||||
|
||||
|
||||
|
||||
## [0.5.0] (2016-03-12)
|
||||
|
||||
* **CHANGE** 改为使用来自 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据。
|
||||
* **NEW** 命令行程序支持从标准输入读取数据(支持管道和重定向输入):
|
||||
|
||||
```shell
|
||||
$ echo "你好" | pinyin
|
||||
nǐ hǎo
|
||||
$ pinyin < hello.txt
|
||||
nǐ hǎo
|
||||
```
|
||||
|
||||
|
||||
## [0.4.0] (2016-01-29)
|
||||
|
||||
* **NEW** `Args` 结构体新增 field: `Fallback func(r rune, a Args) []string`
|
||||
用于处理没有拼音的字符(默认忽略没有拼音的字符):
|
||||
```go
|
||||
a := pinyin.NewArgs()
|
||||
a.Fallback = func(r rune, a pinyin.Args) []string {
|
||||
return []string{string(r + 1)}
|
||||
}
|
||||
fmt.Println(pinyin.Pinyin("中国人abc", a))
|
||||
// Output: [[zhong] [guo] [ren] [b] [c] [d]]
|
||||
|
||||
// or
|
||||
pinyin.Fallback = func(r rune, a pinyin.Args) []string {
|
||||
return []string{string(r)}
|
||||
}
|
||||
fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs()))
|
||||
// Output: [[zhong] [guo] [ren] [a] [b] [c]]
|
||||
```
|
||||
|
||||
|
||||
## [0.3.0] (2015-12-29)
|
||||
|
||||
* fix "当字符串中有非中文的时候,会出现下标越界的情况"(影响 `pinyin.LazyPinyin` 和 `pinyin.Slug` ([#1](https://github.com/mozillazg/go-pinyin/issues/1)))
|
||||
* 调整对非中文字符的处理:当遇到没有拼音的字符时,直接忽略
|
||||
```go
|
||||
// before
|
||||
fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs()))
|
||||
[[zhong] [guo] [ren] [] [] []]
|
||||
|
||||
// after
|
||||
fmt.Println(pinyin.Pinyin("中国人abc", pinyin.NewArgs()))
|
||||
[[zhong] [guo] [ren]]
|
||||
```
|
||||
|
||||
|
||||
## [0.2.1] (2015-08-26)
|
||||
|
||||
* `yu`, `y`, `w` 不是声母
|
||||
|
||||
|
||||
## [0.2.0] (2015-01-04)
|
||||
|
||||
* 新增 `func NewArgs() Args`
|
||||
* 解决 `Args.Separator` 无法赋值为 `""` 的 BUG
|
||||
* 规范命名:
|
||||
* `NORMAL` -> `Normal`
|
||||
* `TONE` -> `Tone`
|
||||
* `TONE2` -> `Tone2`
|
||||
* `INITIALS` -> `Initials`
|
||||
* `FIRST_LETTER` -> `FirstLetter`
|
||||
* `FINALS` -> `Finals`
|
||||
* `FINALS_TONE` -> `FinalsTone`
|
||||
* `FINALS_TONE2` -> `FinalsTone2`
|
||||
|
||||
## [0.1.1] (2014-12-07)
|
||||
* 更新拼音库
|
||||
|
||||
|
||||
## 0.1.0 (2014-11-23)
|
||||
* Initial Release
|
||||
|
||||
|
||||
[pinyin-data]: https://github.com/mozillazg/pinyin-data
|
||||
[@wdscxsj]: https://github.com/wdscxsj
|
||||
[@huacnlee]: https://github.com/huacnlee
|
||||
[#19]: https://github.com/mozillazg/go-pinyin/pull/19
|
||||
[#20]: https://github.com/mozillazg/go-pinyin/pull/20
|
||||
[#30]: https://github.com/mozillazg/go-pinyin/pull/30
|
||||
[#37]: https://github.com/mozillazg/go-pinyin/pull/37
|
||||
[#35]: https://github.com/mozillazg/go-pinyin/issues/35
|
||||
|
||||
[0.1.1]: https://github.com/mozillazg/go-pinyin/compare/v0.1.0...v0.1.1
|
||||
[0.2.0]: https://github.com/mozillazg/go-pinyin/compare/v0.1.1...v0.2.0
|
||||
[0.2.1]: https://github.com/mozillazg/go-pinyin/compare/v0.2.0...v0.2.1
|
||||
[0.3.0]: https://github.com/mozillazg/go-pinyin/compare/v0.2.1...v0.3.0
|
||||
[0.4.0]: https://github.com/mozillazg/go-pinyin/compare/v0.3.0...v0.4.0
|
||||
[0.5.0]: https://github.com/mozillazg/go-pinyin/compare/v0.4.0...v0.5.0
|
||||
[0.6.0]: https://github.com/mozillazg/go-pinyin/compare/v0.5.0...v0.6.0
|
||||
[0.7.0]: https://github.com/mozillazg/go-pinyin/compare/v0.6.0...v0.7.0
|
||||
[0.8.0]: https://github.com/mozillazg/go-pinyin/compare/v0.7.0...v0.8.0
|
||||
[0.9.0]: https://github.com/mozillazg/go-pinyin/compare/v0.8.0...v0.9.0
|
||||
[0.10.0]: https://github.com/mozillazg/go-pinyin/compare/v0.9.0...v0.10.0
|
||||
[0.11.0]: https://github.com/mozillazg/go-pinyin/compare/v0.10.0...v0.11.0
|
||||
[0.12.0]: https://github.com/mozillazg/go-pinyin/compare/v0.11.0...v0.12.0
|
||||
[0.13.0]: https://github.com/mozillazg/go-pinyin/compare/v0.12.0...v0.13.0
|
||||
[0.14.0]: https://github.com/mozillazg/go-pinyin/compare/v0.13.0...v0.14.0
|
||||
[0.15.0]: https://github.com/mozillazg/go-pinyin/compare/v0.14.0...v0.15.0
|
||||
[0.16.0]: https://github.com/mozillazg/go-pinyin/compare/v0.15.0...v0.16.0
|
||||
[0.17.0]: https://github.com/mozillazg/go-pinyin/compare/v0.16.0...v0.17.0
|
||||
[0.18.0]: https://github.com/mozillazg/go-pinyin/compare/v0.17.0...v0.18.0
|
||||
[0.19.0]: https://github.com/mozillazg/go-pinyin/compare/v0.18.0...v0.19.0
|
||||
[0.20.0]: https://github.com/mozillazg/go-pinyin/compare/v0.19.0...v0.20.0
|
|
@ -0,0 +1,22 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 mozillazg
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
help:
|
||||
@echo "test run test"
|
||||
@echo "lint run lint"
|
||||
@echo "gen_pinyin_dict gen pinyin dict"
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
@echo "run test"
|
||||
@go test -v -cover
|
||||
|
||||
.PHONY: gen_pinyin_dict
|
||||
gen_pinyin_dict:
|
||||
@go run _tools/gen_pinyin_dict.go _tools/pinyin-data/pinyin.txt pinyin_dict.go
|
||||
|
||||
.PHONY: lint
|
||||
lint:
|
||||
gofmt -s -w . cmd/pinyin _tools
|
||||
golint .
|
||||
golint cmd/pinyin
|
||||
golint _tools
|
|
@ -0,0 +1,115 @@
|
|||
go-pinyin
|
||||
=========
|
||||
|
||||
[![Build Status](https://github.com/mozillazg/go-pinyin/actions/workflows/ci.yml/badge.svg?branch=master)](https://github.com/mozillazg/go-pinyin/actions/workflows/ci.yml)
|
||||
[![Coverage Status](https://coveralls.io/repos/mozillazg/go-pinyin/badge.svg?branch=master)](https://coveralls.io/r/mozillazg/go-pinyin?branch=master)
|
||||
[![Go Report Card](https://goreportcard.com/badge/github.com/mozillazg/go-pinyin)](https://goreportcard.com/report/github.com/mozillazg/go-pinyin)
|
||||
[![GoDoc](https://godoc.org/github.com/mozillazg/go-pinyin?status.svg)](https://godoc.org/github.com/mozillazg/go-pinyin)
|
||||
|
||||
汉语拼音转换工具 Go 版。
|
||||
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
```
|
||||
go get github.com/mozillazg/go-pinyin
|
||||
```
|
||||
|
||||
install CLI tool:
|
||||
|
||||
```
|
||||
# go version>=1.17
|
||||
go install github.com/mozillazg/go-pinyin/cli/pinyin@latest
|
||||
|
||||
# go version<1.17
|
||||
go get -u github.com/mozillazg/go-pinyin/cli/pinyin
|
||||
|
||||
$ pinyin 中国人
|
||||
zhōng guó rén
|
||||
```
|
||||
|
||||
|
||||
Documentation
|
||||
--------------
|
||||
|
||||
API documentation can be found here:
|
||||
https://godoc.org/github.com/mozillazg/go-pinyin
|
||||
|
||||
|
||||
Usage
|
||||
------
|
||||
|
||||
```go
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/mozillazg/go-pinyin"
|
||||
)
|
||||
|
||||
func main() {
|
||||
hans := "中国人"
|
||||
|
||||
// 默认
|
||||
a := pinyin.NewArgs()
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
// 包含声调
|
||||
a.Style = pinyin.Tone
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhōng] [guó] [rén]]
|
||||
|
||||
// 声调用数字表示
|
||||
a.Style = pinyin.Tone2
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zho1ng] [guo2] [re2n]]
|
||||
|
||||
// 开启多音字模式
|
||||
a = pinyin.NewArgs()
|
||||
a.Heteronym = true
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhong zhong] [guo] [ren]]
|
||||
a.Style = pinyin.Tone2
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zho1ng zho4ng] [guo2] [re2n]]
|
||||
|
||||
fmt.Println(pinyin.LazyPinyin(hans, pinyin.NewArgs()))
|
||||
// [zhong guo ren]
|
||||
|
||||
fmt.Println(pinyin.Convert(hans, nil))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
fmt.Println(pinyin.LazyConvert(hans, nil))
|
||||
// [zhong guo ren]
|
||||
}
|
||||
```
|
||||
|
||||
注意:
|
||||
|
||||
* 默认情况下会忽略没有拼音的字符(可以通过自定义 `Fallback` 参数的值来自定义如何处理没有拼音的字符,
|
||||
详见 [示例](https://godoc.org/github.com/mozillazg/go-pinyin#example-Pinyin--FallbackCustom1))。
|
||||
* 根据 [《汉语拼音方案》](http://www.moe.gov.cn/s78/A19/yxs_left/moe_810/s230/195802/t19580201_186000.html) y,w,ü (yu) 都不是声母,
|
||||
以及不是所有拼音都有声母,如果这不是你预期的话,你可能需要的是首字母风格 `FirstLetter`
|
||||
( [详细信息](https://github.com/mozillazg/python-pinyin#%E4%B8%BA%E4%BB%80%E4%B9%88%E6%B2%A1%E6%9C%89-y-w-yu-%E5%87%A0%E4%B8%AA%E5%A3%B0%E6%AF%8D) )。
|
||||
|
||||
|
||||
Related Projects
|
||||
-----------------
|
||||
|
||||
* [hotoo/pinyin](https://github.com/hotoo/pinyin): 汉语拼音转换工具 Node.js/JavaScript 版。
|
||||
* [mozillazg/python-pinyin](https://github.com/mozillazg/python-pinyin): 汉语拼音转换工具 Python 版。
|
||||
* [mozillazg/rust-pinyin](https://github.com/mozillazg/rust-pinyin): 汉语拼音转换工具 Rust 版。
|
||||
|
||||
|
||||
pinyin data
|
||||
-----------------
|
||||
|
||||
* 使用 [pinyin-data](https://github.com/mozillazg/pinyin-data) 的拼音数据
|
||||
|
||||
|
||||
License
|
||||
---------
|
||||
|
||||
Under the MIT License.
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
Package pinyin : 汉语拼音转换工具.
|
||||
|
||||
Usage
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"github.com/mozillazg/go-pinyin"
|
||||
)
|
||||
|
||||
func main() {
|
||||
hans := "中国人"
|
||||
// 默认
|
||||
a := pinyin.NewArgs()
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhong] [guo] [ren]]
|
||||
|
||||
// 包含声调
|
||||
a.Style = pinyin.Tone
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhōng] [guó] [rén]]
|
||||
|
||||
// 声调用数字表示
|
||||
a.Style = pinyin.Tone2
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zho1ng] [guo2] [re2n]]
|
||||
|
||||
// 开启多音字模式
|
||||
a = pinyin.NewArgs()
|
||||
a.Heteronym = true
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zhong zhong] [guo] [ren]]
|
||||
a.Style = pinyin.Tone2
|
||||
fmt.Println(pinyin.Pinyin(hans, a))
|
||||
// [[zho1ng zho4ng] [guo2] [re2n]]
|
||||
}
|
||||
*/
|
||||
package pinyin
|
|
@ -0,0 +1,33 @@
|
|||
package pinyin
|
||||
|
||||
// 带音标字符。
|
||||
var phoneticSymbol = map[string]string{
|
||||
"ā": "a1",
|
||||
"á": "a2",
|
||||
"ǎ": "a3",
|
||||
"à": "a4",
|
||||
"ē": "e1",
|
||||
"é": "e2",
|
||||
"ě": "e3",
|
||||
"è": "e4",
|
||||
"ō": "o1",
|
||||
"ó": "o2",
|
||||
"ǒ": "o3",
|
||||
"ò": "o4",
|
||||
"ī": "i1",
|
||||
"í": "i2",
|
||||
"ǐ": "i3",
|
||||
"ì": "i4",
|
||||
"ū": "u1",
|
||||
"ú": "u2",
|
||||
"ǔ": "u3",
|
||||
"ù": "u4",
|
||||
"ü": "v",
|
||||
"ǘ": "v2",
|
||||
"ǚ": "v3",
|
||||
"ǜ": "v4",
|
||||
"ń": "n2",
|
||||
"ň": "n3",
|
||||
"ǹ": "n4",
|
||||
"ḿ": "m2",
|
||||
}
|
|
@ -0,0 +1,275 @@
|
|||
package pinyin
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Meta
|
||||
const (
|
||||
Version = "0.20.0"
|
||||
Author = "mozillazg, 闲耘"
|
||||
License = "MIT"
|
||||
Copyright = "Copyright (c) 2016 mozillazg, 闲耘"
|
||||
)
|
||||
|
||||
// 拼音风格(推荐)
|
||||
const (
|
||||
Normal = 0 // 普通风格,不带声调(默认风格)。如: zhong guo
|
||||
Tone = 1 // 声调风格1,拼音声调在韵母第一个字母上。如: zhōng guó
|
||||
Tone2 = 2 // 声调风格2,即拼音声调在各个韵母之后,用数字 [1-4] 进行表示。如: zho1ng guo2
|
||||
Tone3 = 8 // 声调风格3,即拼音声调在各个拼音之后,用数字 [1-4] 进行表示。如: zhong1 guo2
|
||||
Initials = 3 // 声母风格,只返回各个拼音的声母部分。如: zh g 。注意:不是所有的拼音都有声母
|
||||
FirstLetter = 4 // 首字母风格,只返回拼音的首字母部分。如: z g
|
||||
Finals = 5 // 韵母风格,只返回各个拼音的韵母部分,不带声调。如: ong uo
|
||||
FinalsTone = 6 // 韵母风格1,带声调,声调在韵母第一个字母上。如: ōng uó
|
||||
FinalsTone2 = 7 // 韵母风格2,带声调,声调在各个韵母之后,用数字 [1-4] 进行表示。如: o1ng uo2
|
||||
FinalsTone3 = 9 // 韵母风格3,带声调,声调在各个拼音之后,用数字 [1-4] 进行表示。如: ong1 uo2
|
||||
)
|
||||
|
||||
// 拼音风格(兼容之前的版本)
|
||||
const (
|
||||
NORMAL = Normal
|
||||
TONE = Tone
|
||||
TONE2 = Tone2
|
||||
INITIALS = Initials
|
||||
FIRST_LETTER = FirstLetter
|
||||
FINALS = Finals
|
||||
FINALS_TONE = FinalsTone
|
||||
FINALS_TONE2 = FinalsTone2
|
||||
)
|
||||
|
||||
// 声母表
|
||||
var initialArray = strings.Split(
|
||||
"b,p,m,f,d,t,n,l,g,k,h,j,q,x,r,zh,ch,sh,z,c,s",
|
||||
",",
|
||||
)
|
||||
|
||||
// 所有带声调的字符
|
||||
var rePhoneticSymbolSource = func(m map[string]string) string {
|
||||
s := ""
|
||||
for k := range m {
|
||||
s = s + k
|
||||
}
|
||||
return s
|
||||
}(phoneticSymbol)
|
||||
|
||||
// 匹配带声调字符的正则表达式
|
||||
var rePhoneticSymbol = regexp.MustCompile("[" + rePhoneticSymbolSource + "]")
|
||||
|
||||
// 匹配使用数字标识声调的字符的正则表达式
|
||||
var reTone2 = regexp.MustCompile("([aeoiuvnm])([1-4])$")
|
||||
|
||||
// 匹配 Tone2 中标识韵母声调的正则表达式
|
||||
var reTone3 = regexp.MustCompile("^([a-z]+)([1-4])([a-z]*)$")
|
||||
|
||||
// Args 配置信息
|
||||
type Args struct {
|
||||
Style int // 拼音风格(默认: Normal)
|
||||
Heteronym bool // 是否启用多音字模式(默认:禁用)
|
||||
Separator string // Slug 中使用的分隔符(默认:-)
|
||||
|
||||
// 处理没有拼音的字符(默认忽略没有拼音的字符)
|
||||
// 函数返回的 slice 的长度为0 则表示忽略这个字符
|
||||
Fallback func(r rune, a Args) []string
|
||||
}
|
||||
|
||||
// Style 默认配置:风格
|
||||
var Style = Normal
|
||||
|
||||
// Heteronym 默认配置:是否启用多音字模式
|
||||
var Heteronym = false
|
||||
|
||||
// Separator 默认配置: `Slug` 中 Join 所用的分隔符
|
||||
var Separator = "-"
|
||||
|
||||
// Fallback 默认配置: 如何处理没有拼音的字符(忽略这个字符)
|
||||
var Fallback = func(r rune, a Args) []string {
|
||||
return []string{}
|
||||
}
|
||||
|
||||
var finalExceptionsMap = map[string]string{
|
||||
"ū": "ǖ",
|
||||
"ú": "ǘ",
|
||||
"ǔ": "ǚ",
|
||||
"ù": "ǜ",
|
||||
}
|
||||
var reFinalExceptions = regexp.MustCompile("^(j|q|x)(ū|ú|ǔ|ù)$")
|
||||
var reFinal2Exceptions = regexp.MustCompile("^(j|q|x)u(\\d?)$")
|
||||
|
||||
// NewArgs 返回包含默认配置的 `Args`
|
||||
func NewArgs() Args {
|
||||
return Args{Style, Heteronym, Separator, Fallback}
|
||||
}
|
||||
|
||||
// 获取单个拼音中的声母
|
||||
func initial(p string) string {
|
||||
s := ""
|
||||
for _, v := range initialArray {
|
||||
if strings.HasPrefix(p, v) {
|
||||
s = v
|
||||
break
|
||||
}
|
||||
}
|
||||
return s
|
||||
}
|
||||
|
||||
// 获取单个拼音中的韵母
|
||||
func final(p string) string {
|
||||
n := initial(p)
|
||||
if n == "" {
|
||||
return handleYW(p)
|
||||
}
|
||||
|
||||
// 特例 j/q/x
|
||||
matches := reFinalExceptions.FindStringSubmatch(p)
|
||||
// jū -> jǖ
|
||||
if len(matches) == 3 && matches[1] != "" && matches[2] != "" {
|
||||
v, _ := finalExceptionsMap[matches[2]]
|
||||
return v
|
||||
}
|
||||
// ju -> jv, ju1 -> jv1
|
||||
p = reFinal2Exceptions.ReplaceAllString(p, "${1}v$2")
|
||||
return strings.Join(strings.SplitN(p, n, 2), "")
|
||||
}
|
||||
|
||||
// 处理 y, w
|
||||
func handleYW(p string) string {
|
||||
// 特例 y/w
|
||||
if strings.HasPrefix(p, "yu") {
|
||||
p = "v" + p[2:] // yu -> v
|
||||
} else if strings.HasPrefix(p, "yi") {
|
||||
p = p[1:] // yi -> i
|
||||
} else if strings.HasPrefix(p, "y") {
|
||||
p = "i" + p[1:] // y -> i
|
||||
} else if strings.HasPrefix(p, "wu") {
|
||||
p = p[1:] // wu -> u
|
||||
} else if strings.HasPrefix(p, "w") {
|
||||
p = "u" + p[1:] // w -> u
|
||||
}
|
||||
return p
|
||||
}
|
||||
|
||||
func toFixed(p string, a Args) string {
|
||||
if a.Style == Initials {
|
||||
return initial(p)
|
||||
}
|
||||
origP := p
|
||||
|
||||
// 替换拼音中的带声调字符
|
||||
py := rePhoneticSymbol.ReplaceAllStringFunc(p, func(m string) string {
|
||||
symbol, _ := phoneticSymbol[m]
|
||||
switch a.Style {
|
||||
// 不包含声调
|
||||
case Normal, FirstLetter, Finals:
|
||||
// 去掉声调: a1 -> a
|
||||
m = reTone2.ReplaceAllString(symbol, "$1")
|
||||
case Tone2, FinalsTone2, Tone3, FinalsTone3:
|
||||
// 返回使用数字标识声调的字符
|
||||
m = symbol
|
||||
default:
|
||||
// 声调在头上
|
||||
}
|
||||
return m
|
||||
})
|
||||
|
||||
switch a.Style {
|
||||
// 将声调移动到最后
|
||||
case Tone3, FinalsTone3:
|
||||
py = reTone3.ReplaceAllString(py, "$1$3$2")
|
||||
}
|
||||
switch a.Style {
|
||||
// 首字母
|
||||
case FirstLetter:
|
||||
py = string([]rune(py)[0])
|
||||
// 韵母
|
||||
case Finals, FinalsTone, FinalsTone2, FinalsTone3:
|
||||
// 转换为 []rune unicode 编码用于获取第一个拼音字符
|
||||
// 因为 string 是 utf-8 编码不方便获取第一个拼音字符
|
||||
rs := []rune(origP)
|
||||
switch string(rs[0]) {
|
||||
// 因为鼻音没有声母所以不需要去掉声母部分
|
||||
case "ḿ", "ń", "ň", "ǹ":
|
||||
default:
|
||||
py = final(py)
|
||||
}
|
||||
}
|
||||
return py
|
||||
}
|
||||
|
||||
func applyStyle(p []string, a Args) []string {
|
||||
newP := []string{}
|
||||
for _, v := range p {
|
||||
newP = append(newP, toFixed(v, a))
|
||||
}
|
||||
return newP
|
||||
}
|
||||
|
||||
// SinglePinyin 把单个 `rune` 类型的汉字转换为拼音.
|
||||
func SinglePinyin(r rune, a Args) []string {
|
||||
if a.Fallback == nil {
|
||||
a.Fallback = Fallback
|
||||
}
|
||||
value, ok := PinyinDict[int(r)]
|
||||
pys := []string{}
|
||||
if ok {
|
||||
pys = strings.Split(value, ",")
|
||||
} else {
|
||||
pys = a.Fallback(r, a)
|
||||
}
|
||||
if len(pys) > 0 {
|
||||
if !a.Heteronym {
|
||||
pys = []string{pys[0]}
|
||||
}
|
||||
return applyStyle(pys, a)
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// Pinyin 汉字转拼音,支持多音字模式.
|
||||
func Pinyin(s string, a Args) [][]string {
|
||||
pys := [][]string{}
|
||||
for _, r := range s {
|
||||
py := SinglePinyin(r, a)
|
||||
if len(py) > 0 {
|
||||
pys = append(pys, py)
|
||||
}
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// LazyPinyin 汉字转拼音,与 `Pinyin` 的区别是:
|
||||
// 返回值类型不同,并且不支持多音字模式,每个汉字只取第一个音.
|
||||
func LazyPinyin(s string, a Args) []string {
|
||||
a.Heteronym = false
|
||||
pys := []string{}
|
||||
for _, v := range Pinyin(s, a) {
|
||||
pys = append(pys, v[0])
|
||||
}
|
||||
return pys
|
||||
}
|
||||
|
||||
// Slug join `LazyPinyin` 的返回值.
|
||||
// 建议改用 https://github.com/mozillazg/go-slugify
|
||||
func Slug(s string, a Args) string {
|
||||
separator := a.Separator
|
||||
return strings.Join(LazyPinyin(s, a), separator)
|
||||
}
|
||||
|
||||
// Convert 跟 Pinyin 的唯一区别就是 a 参数可以是 nil
|
||||
func Convert(s string, a *Args) [][]string {
|
||||
if a == nil {
|
||||
args := NewArgs()
|
||||
a = &args
|
||||
}
|
||||
return Pinyin(s, *a)
|
||||
}
|
||||
|
||||
// LazyConvert 跟 LazyPinyin 的唯一区别就是 a 参数可以是 nil
|
||||
func LazyConvert(s string, a *Args) []string {
|
||||
if a == nil {
|
||||
args := NewArgs()
|
||||
a = &args
|
||||
}
|
||||
return LazyPinyin(s, *a)
|
||||
}
|
File diff suppressed because it is too large
Load Diff
|
@ -50,6 +50,7 @@ github.com/mailru/easyjson/jwriter
|
|||
github.com/montanaflynn/stats
|
||||
# github.com/mozillazg/go-pinyin v0.20.0
|
||||
## explicit; go 1.11
|
||||
github.com/mozillazg/go-pinyin
|
||||
# github.com/olivere/elastic/v7 v7.0.32
|
||||
## explicit; go 1.17
|
||||
github.com/olivere/elastic/v7
|
||||
|
|
Loading…
Reference in New Issue