358 lines
16 KiB
Python
358 lines
16 KiB
Python
import json
|
||
import os
|
||
from pathlib import Path
|
||
|
||
import numpy as np
|
||
|
||
try:
|
||
from vikingdb import VikingDB, IAM, EmbeddingClient
|
||
from vikingdb.vector import EmbeddingData, EmbeddingModelOpt, EmbeddingRequest
|
||
except ImportError as exc: # pragma: no cover
|
||
raise SystemExit(
|
||
"Missing dependency: vikingdb-python-sdk.\n"
|
||
"Install it with: py -3 -m pip install vikingdb-python-sdk"
|
||
) from exc
|
||
|
||
|
||
def zh(value: str) -> str:
|
||
return value.encode("utf-8").decode("unicode_escape")
|
||
|
||
|
||
BUILD_TAGS = [
|
||
{
|
||
"label": zh(r"\u5feb\u5251"),
|
||
"aliases": ["duelist", "swift blade", "swiftblade", zh(r"\u5251\u5feb"), zh(r"\u5feb\u5203")],
|
||
"description": zh(r"\u4ee5\u9ad8\u901f\u8f7b\u5175\u5668\u3001\u8fde\u7eed\u51fa\u624b\u548c\u8d34\u8eab\u538b\u8feb\u4e3a\u6838\u5fc3\u7684\u8fd1\u6218\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u8fde\u6bb5"),
|
||
"aliases": ["combo", "chain", zh(r"\u8fde\u51fb")],
|
||
"description": zh(r"\u4f9d\u8d56\u8fde\u7eed\u547d\u4e2d\u4e0e\u591a\u6bb5\u8282\u594f\u538b\u5236\u7684\u8f93\u51fa\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7a81\u8fdb"),
|
||
"aliases": ["dash", "lunge", "mobility engage"],
|
||
"description": zh(r"\u5f3a\u8c03\u5feb\u901f\u8d34\u8fd1\u76ee\u6807\u3001\u62a2\u5360\u8eab\u4f4d\u548c\u5148\u624b\u5207\u5165\u7684\u6218\u6597\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u8ffd\u51fb"),
|
||
"aliases": ["chase", "follow-up", "finisher chase"],
|
||
"description": zh(r"\u64c5\u957f\u5728\u5bf9\u624b\u5931\u8861\u6216\u88ab\u51fb\u9000\u540e\u7ee7\u7eed\u8ffd\u6253\u7684\u6218\u6597\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5feb\u88ad"),
|
||
"aliases": ["assassin", "rogue", "ambush", zh(r"\u523a\u51fb")],
|
||
"description": zh(r"\u5f3a\u8c03\u77ed\u65f6\u5207\u5165\u3001\u70b9\u6740\u5f31\u70b9\u548c\u8fc5\u901f\u8131\u79bb\u7684\u523a\u51fb\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u8fdc\u5c04"),
|
||
"aliases": ["projectile", "ranged", "arrow", zh(r"\u5c04\u51fb")],
|
||
"description": zh(r"\u4ee5\u6295\u5c04\u7269\u3001\u4e2d\u8fdc\u8ddd\u79bb\u7275\u5236\u548c\u5b89\u5168\u8f93\u51fa\u4e3a\u6838\u5fc3\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u6e38\u51fb"),
|
||
"aliases": ["scout", "skirmish", "harass", "fieldcraft"],
|
||
"description": zh(r"\u5f3a\u8c03\u8fb9\u79fb\u52a8\u8fb9\u8f93\u51fa\u3001\u8bd5\u63a2\u62c9\u626f\u548c\u62e9\u673a\u518d\u5165\u573a\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u673a\u52a8"),
|
||
"aliases": ["mobility", "nimble", "agile"],
|
||
"description": zh(r"\u4ee3\u8868\u9ad8\u4f4d\u79fb\u3001\u9ad8\u8eab\u6cd5\u548c\u5feb\u901f\u6362\u4f4d\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u98ce\u884c"),
|
||
"aliases": ["wind", "gust", "speed", zh(r"\u75be\u884c")],
|
||
"description": zh(r"\u5f3a\u8c03\u8f7b\u7075\u6b65\u6cd5\u3001\u79fb\u901f\u4f18\u52bf\u548c\u8fc5\u901f\u8c03\u4f4d\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u91cd\u51fb"),
|
||
"aliases": ["heavy", "slam", "mighty", "crush"],
|
||
"description": zh(r"\u5f3a\u8c03\u539a\u91cd\u6253\u51fb\u3001\u5355\u6b21\u9ad8\u538b\u8f93\u51fa\u548c\u6b63\u9762\u7838\u7a7f\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7206\u53d1"),
|
||
"aliases": ["burst", "nova", "sudden damage"],
|
||
"description": zh(r"\u4ee3\u8868\u77ed\u7a97\u53e3\u5185\u8fc5\u901f\u62ac\u9ad8\u4f24\u5bb3\u5cf0\u503c\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7834\u7532"),
|
||
"aliases": ["breaker", "armor break", "shatter"],
|
||
"description": zh(r"\u64c5\u957f\u6495\u5f00\u9632\u5fa1\u3001\u6253\u65ad\u5b88\u52bf\u548c\u9488\u5bf9\u786c\u76ee\u6807\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u538b\u5236"),
|
||
"aliases": ["tempo", "pressure", "control offense"],
|
||
"description": zh(r"\u901a\u8fc7\u6301\u7eed\u4e3b\u52a8\u8fdb\u653b\u4e0e\u8282\u594f\u5360\u4f18\u8feb\u4f7f\u5bf9\u624b\u5931\u8bef\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u538b\u8840"),
|
||
"aliases": ["low hp", "berserk", "risk damage"],
|
||
"description": zh(r"\u4ee5\u5192\u9669\u538b\u4f4e\u8840\u7ebf\u6362\u53d6\u66f4\u5f3a\u653b\u51fb\u6027\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5b88\u5fa1"),
|
||
"aliases": ["ward", "guard", "protector", "defense"],
|
||
"description": zh(r"\u5f3a\u8c03\u51cf\u4f24\u3001\u7a33\u5b88\u548c\u9876\u4f4f\u6b63\u9762\u4f24\u5bb3\u7684\u9632\u5fa1\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u62a4\u4f53"),
|
||
"aliases": ["barrier", "shielding", "spirit guard", "spirit"],
|
||
"description": zh(r"\u504f\u5411\u62a4\u7f69\u3001\u62a4\u8eab\u6c14\u52b2\u548c\u72b6\u6001\u6297\u538b\u7684\u9632\u5fa1\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u91cd\u7532"),
|
||
"aliases": ["tank", "heavy armor", "iron wall"],
|
||
"description": zh(r"\u4ee3\u8868\u9ad8\u786c\u5ea6\u62a4\u7532\u3001\u6b63\u9762\u627f\u4f24\u4e0e\u7a33\u5b9a\u7ad9\u573a\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u53cd\u51fb"),
|
||
"aliases": ["counter", "riposte", "retaliate"],
|
||
"description": zh(r"\u901a\u8fc7\u683c\u6321\u3001\u7ad9\u6869\u4e0e\u540e\u624b\u60e9\u7f5a\u5f62\u6210\u6536\u76ca\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u9547\u90aa"),
|
||
"aliases": ["banish", "holy ward", "warding seal"],
|
||
"description": zh(r"\u64c5\u957f\u538b\u5236\u90aa\u795f\u3001\u5492\u715e\u548c\u5f02\u7c7b\u80fd\u91cf\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u6cd5\u4fee"),
|
||
"aliases": ["caster", "mage", "arcane", "spell"],
|
||
"description": zh(r"\u4ee5\u6cd5\u672f\u9a71\u52a8\u8f93\u51fa\u3001\u63a7\u5236\u548c\u8d44\u6e90\u8fd0\u8f6c\u7684\u6838\u5fc3\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u6cd5\u529b"),
|
||
"aliases": ["mana", "magic", "essence", "spirit power"],
|
||
"description": zh(r"\u56f4\u7ed5\u6cd5\u529b\u4e0a\u9650\u3001\u6cd5\u672f\u6d88\u8017\u4e0e\u6cd5\u80fd\u5faa\u73af\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u96f7\u6cd5"),
|
||
"aliases": ["lightning", "thunder", "storm"],
|
||
"description": zh(r"\u4ee3\u8868\u9ad8\u538b\u96f7\u7cfb\u672f\u6cd5\u3001\u77ac\u65f6\u9707\u8361\u548c\u9ebb\u75f9\u538b\u5236\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7b26\u9635"),
|
||
"aliases": ["sigil", "formation", "seal", "rune"],
|
||
"description": zh(r"\u901a\u8fc7\u7b26\u7bb4\u3001\u6cd5\u9635\u548c\u9884\u5e03\u7f6e\u6548\u679c\u6539\u53d8\u6218\u573a\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u63a7\u573a"),
|
||
"aliases": ["control", "crowd control", "lockdown"],
|
||
"description": zh(r"\u4ee5\u9650\u5236\u884c\u52a8\u3001\u5c01\u9501\u7a7a\u95f4\u548c\u538b\u7f29\u9009\u62e9\u4e3a\u6838\u5fc3\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u8fc7\u8f7d"),
|
||
"aliases": ["overload", "surge", "power spike"],
|
||
"description": zh(r"\u5728\u77ed\u65f6\u95f4\u5185\u63a8\u52a8\u9ad8\u6cd5\u8017\u4e0e\u9ad8\u5f3a\u5ea6\u91ca\u653e\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u56de\u590d"),
|
||
"aliases": ["heal", "healing", "recovery", "restore"],
|
||
"description": zh(r"\u5f3a\u8c03\u5373\u65f6\u6062\u590d\u4e0e\u6218\u540e\u7eed\u63a5\u80fd\u529b\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u62a4\u6301"),
|
||
"aliases": ["support", "aid", "blessing"],
|
||
"description": zh(r"\u901a\u8fc7\u589e\u76ca\u3001\u62ac\u7a33\u6001\u548c\u4fdd\u62a4\u961f\u53cb\u6765\u5efa\u7acb\u4f18\u52bf\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7eed\u6218"),
|
||
"aliases": ["sustain", "endurance", "long fight"],
|
||
"description": zh(r"\u9762\u5411\u957f\u7ebf\u6218\u6597\u3001\u8d44\u6e90\u6301\u7eed\u4e0e\u5bb9\u9519\u63d0\u5347\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u547d\u7eb9"),
|
||
"aliases": ["fate", "omen", "destiny"],
|
||
"description": zh(r"\u56f4\u7ed5\u547d\u8fd0\u3001\u5370\u8bb0\u4e0e\u89e6\u53d1\u5f0f\u8fde\u9501\u6536\u76ca\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u673a\u7f18"),
|
||
"aliases": ["fortune", "luck", "opportunity"],
|
||
"description": zh(r"\u4f9d\u8d56\u65f6\u673a\u3001\u8fd0\u52bf\u548c\u989d\u5916\u6536\u76ca\u89e6\u53d1\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u51b7\u5374"),
|
||
"aliases": ["cooldown", "cdr", "recharge"],
|
||
"description": zh(r"\u901a\u8fc7\u66f4\u5feb\u5468\u8f6c\u6280\u80fd\u4e0e\u9053\u5177\u6765\u6eda\u52a8\u4f18\u52bf\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u7edf\u5fa1"),
|
||
"aliases": ["commander", "command", "leader"],
|
||
"description": zh(r"\u5f3a\u8c03\u6574\u4f53\u534f\u8c03\u3001\u56e2\u961f\u6536\u76ca\u548c\u7efc\u5408\u8c03\u5ea6\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5747\u8861"),
|
||
"aliases": ["balanced", "adaptable", "all-round"],
|
||
"description": zh(r"\u6ca1\u6709\u660e\u663e\u77ed\u677f\uff0c\u504f\u91cd\u4e2d\u540e\u671f\u7a33\u5b9a\u6210\u578b\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5de5\u5de7"),
|
||
"aliases": ["craft", "artisan", "utility", "socket"],
|
||
"description": zh(r"\u504f\u5411\u5de5\u827a\u3001\u5668\u68b0\u3001\u9576\u5d4c\u548c\u8f85\u52a9\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u70bc\u836f"),
|
||
"aliases": ["alchemy", "potion", "tonic"],
|
||
"description": zh(r"\u56f4\u7ed5\u836f\u5242\u3001\u4e34\u65f6\u5f3a\u5316\u548c\u6218\u4e2d\u8865\u7ed9\u7684\u5de5\u827a\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5148\u950b"),
|
||
"aliases": ["vanguard", "frontline"],
|
||
"description": zh(r"\u4ee3\u8868\u961f\u4f0d\u4e2d\u7684\u6b63\u9762\u5f00\u8def\u3001\u5403\u7ebf\u4e0e\u538b\u524d\u6392\u804c\u8d23\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u72c2\u6218"),
|
||
"aliases": ["berserker", "rage"],
|
||
"description": zh(r"\u4ee5\u8840\u91cf\u4ea4\u6362\u3001\u731b\u653b\u548c\u9ad8\u98ce\u9669\u9ad8\u56de\u62a5\u4e3a\u7279\u8272\u7684\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u6cd5\u5251"),
|
||
"aliases": ["spellblade", "bladecaster"],
|
||
"description": zh(r"\u878d\u5408\u5175\u5203\u4e0e\u672f\u6cd5\uff0c\u64c5\u957f\u4e2d\u8ddd\u79bb\u538b\u8feb\u7684\u6df7\u5408\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5723\u4f51"),
|
||
"aliases": ["paladin", "holy guard"],
|
||
"description": zh(r"\u517c\u5177\u9632\u62a4\u3001\u56de\u590d\u548c\u60e9\u6212\u80fd\u529b\u7684\u795d\u798f\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u5821\u5792"),
|
||
"aliases": ["fortress", "bulwark"],
|
||
"description": zh(r"\u4ee5\u7a33\u5b9a\u7ad9\u573a\u3001\u786c\u6297\u4e0e\u53cd\u6253\u4e3a\u6838\u5fc3\u7684\u91cd\u9632\u5fa1\u6807\u7b7e\u3002"),
|
||
},
|
||
{
|
||
"label": zh(r"\u8d77\u624b"),
|
||
"aliases": ["starter", "legacy"],
|
||
"description": zh(r"\u504f\u8fc7\u6e21\u4e0e\u8d77\u6b65\u7528\u9014\u7684\u65e9\u671f\u6784\u7b51\u6807\u7b7e\u3002"),
|
||
},
|
||
]
|
||
|
||
|
||
def build_prompt(definition: dict) -> str:
|
||
aliases = "\u3001".join(definition["aliases"])
|
||
return f"{definition['label']}:{definition['description']} 别名:{aliases}。"
|
||
|
||
|
||
def load_env_file(path: Path, protected_keys: set[str]) -> None:
|
||
if not path.exists():
|
||
return
|
||
|
||
for raw_line in path.read_text(encoding="utf-8").splitlines():
|
||
line = raw_line.strip()
|
||
if not line or line.startswith("#") or "=" not in line:
|
||
continue
|
||
|
||
key, value = line.split("=", 1)
|
||
key = key.strip()
|
||
if not key or key in protected_keys:
|
||
continue
|
||
|
||
value = value.strip()
|
||
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
|
||
value = value[1:-1]
|
||
|
||
os.environ[key] = value
|
||
|
||
|
||
def load_local_env() -> None:
|
||
root_dir = Path(__file__).resolve().parents[1]
|
||
protected_keys = set(os.environ)
|
||
|
||
load_env_file(root_dir / ".env", protected_keys)
|
||
load_env_file(root_dir / ".env.local", protected_keys)
|
||
|
||
|
||
def create_embedding_client() -> EmbeddingClient:
|
||
access_key = os.getenv("VOLCENGINE_ACCESS_KEY_ID") or os.getenv("VIKINGDB_ACCESS_KEY_ID")
|
||
secret_key = os.getenv("VOLCENGINE_SECRET_ACCESS_KEY") or os.getenv("VIKINGDB_SECRET_ACCESS_KEY")
|
||
host = os.getenv("VIKINGDB_HOST", "api-vikingdb.vikingdb.cn-beijing.volces.com")
|
||
region = os.getenv("VIKINGDB_REGION", "cn-beijing")
|
||
|
||
if not access_key or not secret_key:
|
||
raise SystemExit(
|
||
"Missing VikingDB credentials.\n"
|
||
"Required:\n"
|
||
" VOLCENGINE_ACCESS_KEY_ID\n"
|
||
" VOLCENGINE_SECRET_ACCESS_KEY\n"
|
||
"Optional:\n"
|
||
" VIKINGDB_HOST (default: api-vikingdb.vikingdb.cn-beijing.volces.com)\n"
|
||
" VIKINGDB_REGION (default: cn-beijing)\n"
|
||
)
|
||
|
||
service = VikingDB(
|
||
host=host,
|
||
region=region,
|
||
auth=IAM(ak=access_key, sk=secret_key),
|
||
)
|
||
return EmbeddingClient(service)
|
||
|
||
|
||
def encode_texts(client: EmbeddingClient, texts: list[str]) -> np.ndarray:
|
||
request = EmbeddingRequest(
|
||
data=[EmbeddingData(text=text) for text in texts],
|
||
dense_model=EmbeddingModelOpt(name="bge-large-zh"),
|
||
)
|
||
response = client.embedding(request)
|
||
result = getattr(response, "result", None)
|
||
data = getattr(result, "data", None) if result is not None else None
|
||
if data is None and isinstance(result, dict):
|
||
data = result.get("data")
|
||
if data is None:
|
||
data = getattr(response, "data", None)
|
||
|
||
if data is None:
|
||
raise ValueError("Embedding response did not include any data entries.")
|
||
|
||
embeddings: list[list[float]] = []
|
||
for item in data:
|
||
dense = getattr(item, "dense", None)
|
||
if dense is None and isinstance(item, dict):
|
||
dense = item.get("dense")
|
||
if dense is None:
|
||
dense = getattr(item, "embedding", None)
|
||
if dense is None and isinstance(item, dict):
|
||
dense = item.get("embedding")
|
||
if dense is None:
|
||
raise ValueError("Embedding response item did not include a dense vector.")
|
||
embeddings.append(dense)
|
||
|
||
matrix = np.array(embeddings, dtype=np.float32)
|
||
norms = np.linalg.norm(matrix, axis=1, keepdims=True)
|
||
norms[norms == 0] = 1.0
|
||
return matrix / norms
|
||
|
||
|
||
def main():
|
||
load_local_env()
|
||
client = create_embedding_client()
|
||
prompts = [build_prompt(definition) for definition in BUILD_TAGS]
|
||
embeddings = encode_texts(client, prompts)
|
||
|
||
threshold = 0.35
|
||
pairs: list[tuple[str, str, float]] = []
|
||
for index, left in enumerate(BUILD_TAGS):
|
||
for other_index in range(index + 1, len(BUILD_TAGS)):
|
||
right = BUILD_TAGS[other_index]
|
||
similarity = float(np.dot(embeddings[index], embeddings[other_index]))
|
||
if similarity < threshold:
|
||
continue
|
||
pairs.append((left["label"], right["label"], round(similarity, 4)))
|
||
|
||
output_path = Path(__file__).resolve().parents[1] / "src" / "data" / "buildTagSimilarity.generated.ts"
|
||
lines = [
|
||
"export const BUILD_TAG_SIMILARITY_PAIRS: Array<readonly [string, string, number]> = ["
|
||
]
|
||
for left, right, similarity in pairs:
|
||
lines.append(f" ['{left}', '{right}', {similarity}],")
|
||
lines.append("] as const;")
|
||
output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
|
||
|
||
print(json.dumps({
|
||
"output": str(output_path),
|
||
"pair_count": len(pairs),
|
||
"model": "bge-large-zh",
|
||
}, ensure_ascii=False))
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|