不具合の内容
――は のような文章を読み上げようとすると “500 Internal Server Error” が発生します。
一方 ――あ の場合はエラーとなりません。
現象・ログ
$ poetry run task serve
# 「――あ」
[2024/12/11 00:36:31] INFO: 127.0.0.1:55060 - "POST /audio_query?text=%E2%80%94%E2%80%94%E3%81%82&speaker=888753760 HTTP/1.1" 200 OK
# 「――が」
[2024/12/11 00:38:21] ERROR: Internal Server Error occurred.
Traceback (most recent call last):
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/middleware/errors.py", line 165, in __call__
await self.app(scope, receive, _send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/middleware/exceptions.py", line 62, in __call__
await wrap_app_handling_exceptions(self.app, conn)(scope, receive, send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/routing.py", line 715, in __call__
await self.middleware_stack(scope, receive, send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/routing.py", line 735, in app
await route.handle(scope, receive, send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/routing.py", line 288, in handle
await self.app(scope, receive, send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/routing.py", line 76, in app
await wrap_app_handling_exceptions(app, request)(scope, receive, send)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/_exception_handler.py", line 53, in wrapped_app
raise exc
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/_exception_handler.py", line 42, in wrapped_app
await app(scope, receive, sender)
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/routing.py", line 73, in app
response = await f(request)
^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/fastapi/routing.py", line 301, in app
raw_response = await run_endpoint_function(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/fastapi/routing.py", line 214, in run_endpoint_function
return await run_in_threadpool(dependant.call, **values)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~//Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/starlette/concurrency.py", line 39, in run_in_threadpool
return await anyio.to_thread.run_sync(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/anyio/to_thread.py", line 56, in run_sync
return await get_async_backend().run_sync_in_worker_thread(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 2441, in run_sync_in_worker_thread
return await future
^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/.venv/lib/python3.11/site-packages/anyio/_backends/_asyncio.py", line 943, in run
result = context.run(func, *args)
^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/voicevox_engine/app/routers/tts_pipeline.py", line 100, in audio_query
accent_phrases = engine.create_accent_phrases(text, style_id)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/voicevox_engine/tts_pipeline/style_bert_vits2_tts_engine.py", line 424, in create_accent_phrases
accent_phrases = self._debug_create_accent_phrases(text)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "~/Documents/AivisSpeech-Engine/voicevox_engine/tts_pipeline/style_bert_vits2_tts_engine.py", line 324, in _debug_create_accent_phrases
if sep_phonemes_with_joshi_mora_index >= len(sep_phonemes_with_joshi[sep_phonemes_with_joshi_index]): # fmt: skip
~~~~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
IndexError: list index out of range
[2024/12/11 00:38:21] INFO: 127.0.0.1:55732 - "POST /audio_query?text=%E2%80%94%E2%80%94%E3%81%8C&speaker=888753760 HTTP/1.1" 500 Internal Server Error再現手順
Aivis-Project/AivisSpeech-Engine.gitをクローンし, 「開発環境の構築」 を行うpoetry run task serveを実行してAPIサーバーを起動する- http://127.0.0.1:10101/docs にアクセスし
text:――は,speaker:888753760としてPOST /audio_queryを実行する
期待動作
- 500 エラーが発生せずに 200 を返す
AivisSpeech Engine のバージョン
1.1.0-dev(1490102)
OS の種類 / バージョン
- Windows
- macOS
- Linux
その他
少し調査したところ、以下の部分で要素数が合っていないことにより起こるようです。下記のようなassertionを入れるとAssertion Errorとなりました。
assert len(mora_tone_list) == sum(map(len, sep_phonemes_with_joshi))何例か試しましたが、全角ダッシュ2つ + 助詞?等(「は」「が」「で」)の場合に要素数が一致しなくなるようです。
参考までに調査時のログを掲載させて頂きます。問題解決の一助となれば幸いです。
# StyleBertVITS2TTSEngine._debug_create_accent_phrases("―は青")
mora_tone_list['-', 'ワ', 'ア', 'オ']
sep_kata_with_joshi=['-ワ', 'アオ']
sep_phonemes_with_joshi=[[(None, '-'), ('w', 'a')], [(None, 'a'), (None, 'o')]]# StyleBertVITS2TTSEngine._debug_create_accent_phrases("――あ青")
mora_tone_list['-', '-', 'ア', 'ア', 'オ']
sep_kata_with_joshi=['--', 'ア', 'アオ']
sep_phonemes_with_joshi=[[(None, '-'), (None, '-')], [(None, 'a')], [(None, 'a'), (None, 'o')]]# StyleBertVITS2TTSEngine._debug_create_accent_phrases("――は青")
mora_tone_list['-', '-', 'ワ', 'ア', 'オ']
sep_kata_with_joshi=['--ワ', 'アオ']
sep_phonemes_with_joshi=[[(None, '--'), ('w', 'a')], [(None, 'a'), (None, 'o')]]以下が関係しているような気もするのですが、申し訳ありませんが詳しく調査できておりません。
- issue報告する
コードリーディング
# poetry task serve
# python run.py
# app/routers/tts_pipeline.py
def generate_tts_pipeline_router()
@router.post("/audio_query") def audio_query()
accent_phrases = engine.create_accent_phrases(text, style_id)
# voicevox_engine/tts_pipeline/style_bert_vits2_tts_engine.py
class StyleBertVITS2TTSEngine(TTSEngine):
# g2p() を使うためには事前に以下が必要
dir = "/home/kmt/.local/share/AivisSpeech-Engine-Dev/BertModelCaches"
onnx_bert_models.load_tokenizer(
language=Languages.JP,
pretrained_model_name_or_path="tsukumijima/deberta-v2-large-japanese-char-wwm-onnx",
cache_dir=dir,
)from voicevox_engine.tts_pipeline.style_bert_vits2_tts_engine import StyleBertVITS2TTSEngine
def test_sbv2_tts_engine_process_to_create_accent_phrases() -> None:
text = "――が"
accent_phrases = StyleBertVITS2TTSEngine.process_to_create_accent_phrases(text)
print(accent_phrases)
def create_accent_phrases(self, text: str, style_id: StyleId) -> list[AccentPhrase]
normalized_text = normalize_text(text.strip())# style-bert-vits2 = { git = "https://github.com/tsukumijima/Style-Bert-VITS2", rev = "4bedc1e41c22cd04df296c9a3e38ca9898ad8e43" }
from style_bert_vits2.nlp.japanese.g2p import g2p
phones, tones, _, sep_kata_with_joshi = g2p(normalized_text, use_jp_extra=True, raise_yomi_error=False) # fmt: skipdef g2p(...):
...
# fork元と違い, `sep_kata_with_joshi` を作り返している
sep_text, sep_kata, sep_kata_with_joshi = text_to_sep_kata(norm_text, raise_yomi_error=raise_yomi_error)
return phones, tones, word2ph, sep_kata_with_joshi
# https://github.com/tsukumijima/Style-Bert-VITS2/blob/4bedc1e41c22cd04df296c9a3e38ca9898ad8e43/style_bert_vits2/nlp/japanese/g2p.py#L169
def text_to_sep_kata(...):
...
# この単語が助詞 or 助動詞のときは前の要素に連結デバッグ
poetry run task test -s -k 'test_sbv2_tts_engine_process_to_create_accent_phrases'