Source code for kimodo.sanitize
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
"""Text prompt sanitization for motion generation (whitespace, punctuation, capitalization)."""
[docs]
def sanitize_text(text: str, paragraph: bool = True) -> str:
"""Sanitize a text prompt: strip, collapse spaces, capitalize, trim non-alphanumeric, add/fix final punctuation.
Args:
text: Input text prompt.
paragraph: If True, capitalize after each sentence break and normalize spacing between sentences.
Returns:
Sanitized text.
"""
# remove any trailing or leading whitespace
text = text.strip()
# https://stackoverflow.com/a/1546251
# replace duplicate spaces by one space
text = " ".join(text.split())
if text == "":
return text
# removing leading non alpha numeric characters
for i, c in enumerate(text):
if not str.isalnum(c):
continue
break
text = text[i:]
# Capitalize
text = text.capitalize()
final_punctuations = ".!?\"])'"
# removing trailing non alpha numeric characters
# expect final punctuations
for i, c in reversed(list(enumerate(text))):
if not str.isalnum(c) and c not in final_punctuations:
continue
break
text = text[: i + 1]
# Adding period at the end if needed
if text[-1] not in ".!?":
text = text + "."
if paragraph:
# fix end of sentences if several sentences
for sentence_break in ".!?":
subtexts = text.split(sentence_break)
text = f"{sentence_break} ".join( # put back a space after the break
[
y[0].capitalize() + y[1:] # only capitalize the first character
if y
else y # y is empty at the end
for x in subtexts
for y in [x.strip()] # remove extra spaces
]
).strip() # remove extra space at the end
return text
[docs]
def sanitize_texts(texts: list[str]) -> list[str]:
"""Sanitize each text prompt in the list (see sanitize_text).
Args:
texts: List of input text prompts.
Returns:
List of sanitized texts.
"""
return [sanitize_text(text) for text in texts]
if __name__ == "__main__":
texts = [
" A person is walking.",
"someone go forward",
"jump",
"jumping!",
"jumping)",
"-go",
"blocasdji -----",
"",
]
print("Old texts")
print("\n".join(texts))
print()
new_texts = sanitize_texts(texts)
print("Sanitized texts")
print("\n".join(new_texts))