Files
Genarrative/scripts/generate-build-tag-similarity.py
高物 c49c64896a
Some checks failed
CI / verify (push) Has been cancelled
初始仓库迁移
2026-04-04 23:57:06 +08:00

358 lines
16 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import os
from pathlib import Path
import numpy as np
try:
from vikingdb import VikingDB, IAM, EmbeddingClient
from vikingdb.vector import EmbeddingData, EmbeddingModelOpt, EmbeddingRequest
except ImportError as exc: # pragma: no cover
raise SystemExit(
"Missing dependency: vikingdb-python-sdk.\n"
"Install it with: py -3 -m pip install vikingdb-python-sdk"
) from exc
def zh(value: str) -> str:
return value.encode("utf-8").decode("unicode_escape")
BUILD_TAGS = [
{
"label": zh(r"\u5feb\u5251"),
"aliases": ["duelist", "swift blade", "swiftblade", zh(r"\u5251\u5feb"), zh(r"\u5feb\u5203")],
"description": zh(r"\u4ee5\u9ad8\u901f\u8f7b\u5175\u5668\u3001\u8fde\u7eed\u51fa\u624b\u548c\u8d34\u8eab\u538b\u8feb\u4e3a\u6838\u5fc3\u7684\u8fd1\u6218\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u8fde\u6bb5"),
"aliases": ["combo", "chain", zh(r"\u8fde\u51fb")],
"description": zh(r"\u4f9d\u8d56\u8fde\u7eed\u547d\u4e2d\u4e0e\u591a\u6bb5\u8282\u594f\u538b\u5236\u7684\u8f93\u51fa\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7a81\u8fdb"),
"aliases": ["dash", "lunge", "mobility engage"],
"description": zh(r"\u5f3a\u8c03\u5feb\u901f\u8d34\u8fd1\u76ee\u6807\u3001\u62a2\u5360\u8eab\u4f4d\u548c\u5148\u624b\u5207\u5165\u7684\u6218\u6597\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u8ffd\u51fb"),
"aliases": ["chase", "follow-up", "finisher chase"],
"description": zh(r"\u64c5\u957f\u5728\u5bf9\u624b\u5931\u8861\u6216\u88ab\u51fb\u9000\u540e\u7ee7\u7eed\u8ffd\u6253\u7684\u6218\u6597\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5feb\u88ad"),
"aliases": ["assassin", "rogue", "ambush", zh(r"\u523a\u51fb")],
"description": zh(r"\u5f3a\u8c03\u77ed\u65f6\u5207\u5165\u3001\u70b9\u6740\u5f31\u70b9\u548c\u8fc5\u901f\u8131\u79bb\u7684\u523a\u51fb\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u8fdc\u5c04"),
"aliases": ["projectile", "ranged", "arrow", zh(r"\u5c04\u51fb")],
"description": zh(r"\u4ee5\u6295\u5c04\u7269\u3001\u4e2d\u8fdc\u8ddd\u79bb\u7275\u5236\u548c\u5b89\u5168\u8f93\u51fa\u4e3a\u6838\u5fc3\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u6e38\u51fb"),
"aliases": ["scout", "skirmish", "harass", "fieldcraft"],
"description": zh(r"\u5f3a\u8c03\u8fb9\u79fb\u52a8\u8fb9\u8f93\u51fa\u3001\u8bd5\u63a2\u62c9\u626f\u548c\u62e9\u673a\u518d\u5165\u573a\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u673a\u52a8"),
"aliases": ["mobility", "nimble", "agile"],
"description": zh(r"\u4ee3\u8868\u9ad8\u4f4d\u79fb\u3001\u9ad8\u8eab\u6cd5\u548c\u5feb\u901f\u6362\u4f4d\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u98ce\u884c"),
"aliases": ["wind", "gust", "speed", zh(r"\u75be\u884c")],
"description": zh(r"\u5f3a\u8c03\u8f7b\u7075\u6b65\u6cd5\u3001\u79fb\u901f\u4f18\u52bf\u548c\u8fc5\u901f\u8c03\u4f4d\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u91cd\u51fb"),
"aliases": ["heavy", "slam", "mighty", "crush"],
"description": zh(r"\u5f3a\u8c03\u539a\u91cd\u6253\u51fb\u3001\u5355\u6b21\u9ad8\u538b\u8f93\u51fa\u548c\u6b63\u9762\u7838\u7a7f\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7206\u53d1"),
"aliases": ["burst", "nova", "sudden damage"],
"description": zh(r"\u4ee3\u8868\u77ed\u7a97\u53e3\u5185\u8fc5\u901f\u62ac\u9ad8\u4f24\u5bb3\u5cf0\u503c\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7834\u7532"),
"aliases": ["breaker", "armor break", "shatter"],
"description": zh(r"\u64c5\u957f\u6495\u5f00\u9632\u5fa1\u3001\u6253\u65ad\u5b88\u52bf\u548c\u9488\u5bf9\u786c\u76ee\u6807\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u538b\u5236"),
"aliases": ["tempo", "pressure", "control offense"],
"description": zh(r"\u901a\u8fc7\u6301\u7eed\u4e3b\u52a8\u8fdb\u653b\u4e0e\u8282\u594f\u5360\u4f18\u8feb\u4f7f\u5bf9\u624b\u5931\u8bef\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u538b\u8840"),
"aliases": ["low hp", "berserk", "risk damage"],
"description": zh(r"\u4ee5\u5192\u9669\u538b\u4f4e\u8840\u7ebf\u6362\u53d6\u66f4\u5f3a\u653b\u51fb\u6027\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5b88\u5fa1"),
"aliases": ["ward", "guard", "protector", "defense"],
"description": zh(r"\u5f3a\u8c03\u51cf\u4f24\u3001\u7a33\u5b88\u548c\u9876\u4f4f\u6b63\u9762\u4f24\u5bb3\u7684\u9632\u5fa1\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u62a4\u4f53"),
"aliases": ["barrier", "shielding", "spirit guard", "spirit"],
"description": zh(r"\u504f\u5411\u62a4\u7f69\u3001\u62a4\u8eab\u6c14\u52b2\u548c\u72b6\u6001\u6297\u538b\u7684\u9632\u5fa1\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u91cd\u7532"),
"aliases": ["tank", "heavy armor", "iron wall"],
"description": zh(r"\u4ee3\u8868\u9ad8\u786c\u5ea6\u62a4\u7532\u3001\u6b63\u9762\u627f\u4f24\u4e0e\u7a33\u5b9a\u7ad9\u573a\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u53cd\u51fb"),
"aliases": ["counter", "riposte", "retaliate"],
"description": zh(r"\u901a\u8fc7\u683c\u6321\u3001\u7ad9\u6869\u4e0e\u540e\u624b\u60e9\u7f5a\u5f62\u6210\u6536\u76ca\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u9547\u90aa"),
"aliases": ["banish", "holy ward", "warding seal"],
"description": zh(r"\u64c5\u957f\u538b\u5236\u90aa\u795f\u3001\u5492\u715e\u548c\u5f02\u7c7b\u80fd\u91cf\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u6cd5\u4fee"),
"aliases": ["caster", "mage", "arcane", "spell"],
"description": zh(r"\u4ee5\u6cd5\u672f\u9a71\u52a8\u8f93\u51fa\u3001\u63a7\u5236\u548c\u8d44\u6e90\u8fd0\u8f6c\u7684\u6838\u5fc3\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u6cd5\u529b"),
"aliases": ["mana", "magic", "essence", "spirit power"],
"description": zh(r"\u56f4\u7ed5\u6cd5\u529b\u4e0a\u9650\u3001\u6cd5\u672f\u6d88\u8017\u4e0e\u6cd5\u80fd\u5faa\u73af\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u96f7\u6cd5"),
"aliases": ["lightning", "thunder", "storm"],
"description": zh(r"\u4ee3\u8868\u9ad8\u538b\u96f7\u7cfb\u672f\u6cd5\u3001\u77ac\u65f6\u9707\u8361\u548c\u9ebb\u75f9\u538b\u5236\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7b26\u9635"),
"aliases": ["sigil", "formation", "seal", "rune"],
"description": zh(r"\u901a\u8fc7\u7b26\u7bb4\u3001\u6cd5\u9635\u548c\u9884\u5e03\u7f6e\u6548\u679c\u6539\u53d8\u6218\u573a\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u63a7\u573a"),
"aliases": ["control", "crowd control", "lockdown"],
"description": zh(r"\u4ee5\u9650\u5236\u884c\u52a8\u3001\u5c01\u9501\u7a7a\u95f4\u548c\u538b\u7f29\u9009\u62e9\u4e3a\u6838\u5fc3\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u8fc7\u8f7d"),
"aliases": ["overload", "surge", "power spike"],
"description": zh(r"\u5728\u77ed\u65f6\u95f4\u5185\u63a8\u52a8\u9ad8\u6cd5\u8017\u4e0e\u9ad8\u5f3a\u5ea6\u91ca\u653e\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u56de\u590d"),
"aliases": ["heal", "healing", "recovery", "restore"],
"description": zh(r"\u5f3a\u8c03\u5373\u65f6\u6062\u590d\u4e0e\u6218\u540e\u7eed\u63a5\u80fd\u529b\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u62a4\u6301"),
"aliases": ["support", "aid", "blessing"],
"description": zh(r"\u901a\u8fc7\u589e\u76ca\u3001\u62ac\u7a33\u6001\u548c\u4fdd\u62a4\u961f\u53cb\u6765\u5efa\u7acb\u4f18\u52bf\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7eed\u6218"),
"aliases": ["sustain", "endurance", "long fight"],
"description": zh(r"\u9762\u5411\u957f\u7ebf\u6218\u6597\u3001\u8d44\u6e90\u6301\u7eed\u4e0e\u5bb9\u9519\u63d0\u5347\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u547d\u7eb9"),
"aliases": ["fate", "omen", "destiny"],
"description": zh(r"\u56f4\u7ed5\u547d\u8fd0\u3001\u5370\u8bb0\u4e0e\u89e6\u53d1\u5f0f\u8fde\u9501\u6536\u76ca\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u673a\u7f18"),
"aliases": ["fortune", "luck", "opportunity"],
"description": zh(r"\u4f9d\u8d56\u65f6\u673a\u3001\u8fd0\u52bf\u548c\u989d\u5916\u6536\u76ca\u89e6\u53d1\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u51b7\u5374"),
"aliases": ["cooldown", "cdr", "recharge"],
"description": zh(r"\u901a\u8fc7\u66f4\u5feb\u5468\u8f6c\u6280\u80fd\u4e0e\u9053\u5177\u6765\u6eda\u52a8\u4f18\u52bf\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u7edf\u5fa1"),
"aliases": ["commander", "command", "leader"],
"description": zh(r"\u5f3a\u8c03\u6574\u4f53\u534f\u8c03\u3001\u56e2\u961f\u6536\u76ca\u548c\u7efc\u5408\u8c03\u5ea6\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5747\u8861"),
"aliases": ["balanced", "adaptable", "all-round"],
"description": zh(r"\u6ca1\u6709\u660e\u663e\u77ed\u677f\uff0c\u504f\u91cd\u4e2d\u540e\u671f\u7a33\u5b9a\u6210\u578b\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5de5\u5de7"),
"aliases": ["craft", "artisan", "utility", "socket"],
"description": zh(r"\u504f\u5411\u5de5\u827a\u3001\u5668\u68b0\u3001\u9576\u5d4c\u548c\u8f85\u52a9\u6784\u7b51\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u70bc\u836f"),
"aliases": ["alchemy", "potion", "tonic"],
"description": zh(r"\u56f4\u7ed5\u836f\u5242\u3001\u4e34\u65f6\u5f3a\u5316\u548c\u6218\u4e2d\u8865\u7ed9\u7684\u5de5\u827a\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5148\u950b"),
"aliases": ["vanguard", "frontline"],
"description": zh(r"\u4ee3\u8868\u961f\u4f0d\u4e2d\u7684\u6b63\u9762\u5f00\u8def\u3001\u5403\u7ebf\u4e0e\u538b\u524d\u6392\u804c\u8d23\u3002"),
},
{
"label": zh(r"\u72c2\u6218"),
"aliases": ["berserker", "rage"],
"description": zh(r"\u4ee5\u8840\u91cf\u4ea4\u6362\u3001\u731b\u653b\u548c\u9ad8\u98ce\u9669\u9ad8\u56de\u62a5\u4e3a\u7279\u8272\u7684\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u6cd5\u5251"),
"aliases": ["spellblade", "bladecaster"],
"description": zh(r"\u878d\u5408\u5175\u5203\u4e0e\u672f\u6cd5\uff0c\u64c5\u957f\u4e2d\u8ddd\u79bb\u538b\u8feb\u7684\u6df7\u5408\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5723\u4f51"),
"aliases": ["paladin", "holy guard"],
"description": zh(r"\u517c\u5177\u9632\u62a4\u3001\u56de\u590d\u548c\u60e9\u6212\u80fd\u529b\u7684\u795d\u798f\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u5821\u5792"),
"aliases": ["fortress", "bulwark"],
"description": zh(r"\u4ee5\u7a33\u5b9a\u7ad9\u573a\u3001\u786c\u6297\u4e0e\u53cd\u6253\u4e3a\u6838\u5fc3\u7684\u91cd\u9632\u5fa1\u6807\u7b7e\u3002"),
},
{
"label": zh(r"\u8d77\u624b"),
"aliases": ["starter", "legacy"],
"description": zh(r"\u504f\u8fc7\u6e21\u4e0e\u8d77\u6b65\u7528\u9014\u7684\u65e9\u671f\u6784\u7b51\u6807\u7b7e\u3002"),
},
]
def build_prompt(definition: dict) -> str:
aliases = "\u3001".join(definition["aliases"])
return f"{definition['label']}{definition['description']} 别名:{aliases}"
def load_env_file(path: Path, protected_keys: set[str]) -> None:
if not path.exists():
return
for raw_line in path.read_text(encoding="utf-8").splitlines():
line = raw_line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
key = key.strip()
if not key or key in protected_keys:
continue
value = value.strip()
if len(value) >= 2 and value[0] == value[-1] and value[0] in {"'", '"'}:
value = value[1:-1]
os.environ[key] = value
def load_local_env() -> None:
root_dir = Path(__file__).resolve().parents[1]
protected_keys = set(os.environ)
load_env_file(root_dir / ".env", protected_keys)
load_env_file(root_dir / ".env.local", protected_keys)
def create_embedding_client() -> EmbeddingClient:
access_key = os.getenv("VOLCENGINE_ACCESS_KEY_ID") or os.getenv("VIKINGDB_ACCESS_KEY_ID")
secret_key = os.getenv("VOLCENGINE_SECRET_ACCESS_KEY") or os.getenv("VIKINGDB_SECRET_ACCESS_KEY")
host = os.getenv("VIKINGDB_HOST", "api-vikingdb.vikingdb.cn-beijing.volces.com")
region = os.getenv("VIKINGDB_REGION", "cn-beijing")
if not access_key or not secret_key:
raise SystemExit(
"Missing VikingDB credentials.\n"
"Required:\n"
" VOLCENGINE_ACCESS_KEY_ID\n"
" VOLCENGINE_SECRET_ACCESS_KEY\n"
"Optional:\n"
" VIKINGDB_HOST (default: api-vikingdb.vikingdb.cn-beijing.volces.com)\n"
" VIKINGDB_REGION (default: cn-beijing)\n"
)
service = VikingDB(
host=host,
region=region,
auth=IAM(ak=access_key, sk=secret_key),
)
return EmbeddingClient(service)
def encode_texts(client: EmbeddingClient, texts: list[str]) -> np.ndarray:
request = EmbeddingRequest(
data=[EmbeddingData(text=text) for text in texts],
dense_model=EmbeddingModelOpt(name="bge-large-zh"),
)
response = client.embedding(request)
result = getattr(response, "result", None)
data = getattr(result, "data", None) if result is not None else None
if data is None and isinstance(result, dict):
data = result.get("data")
if data is None:
data = getattr(response, "data", None)
if data is None:
raise ValueError("Embedding response did not include any data entries.")
embeddings: list[list[float]] = []
for item in data:
dense = getattr(item, "dense", None)
if dense is None and isinstance(item, dict):
dense = item.get("dense")
if dense is None:
dense = getattr(item, "embedding", None)
if dense is None and isinstance(item, dict):
dense = item.get("embedding")
if dense is None:
raise ValueError("Embedding response item did not include a dense vector.")
embeddings.append(dense)
matrix = np.array(embeddings, dtype=np.float32)
norms = np.linalg.norm(matrix, axis=1, keepdims=True)
norms[norms == 0] = 1.0
return matrix / norms
def main():
load_local_env()
client = create_embedding_client()
prompts = [build_prompt(definition) for definition in BUILD_TAGS]
embeddings = encode_texts(client, prompts)
threshold = 0.35
pairs: list[tuple[str, str, float]] = []
for index, left in enumerate(BUILD_TAGS):
for other_index in range(index + 1, len(BUILD_TAGS)):
right = BUILD_TAGS[other_index]
similarity = float(np.dot(embeddings[index], embeddings[other_index]))
if similarity < threshold:
continue
pairs.append((left["label"], right["label"], round(similarity, 4)))
output_path = Path(__file__).resolve().parents[1] / "src" / "data" / "buildTagSimilarity.generated.ts"
lines = [
"export const BUILD_TAG_SIMILARITY_PAIRS: Array<readonly [string, string, number]> = ["
]
for left, right, similarity in pairs:
lines.append(f" ['{left}', '{right}', {similarity}],")
lines.append("] as const;")
output_path.write_text("\n".join(lines) + "\n", encoding="utf-8")
print(json.dumps({
"output": str(output_path),
"pair_count": len(pairs),
"model": "bge-large-zh",
}, ensure_ascii=False))
if __name__ == "__main__":
main()