Skip to content

Commit a5750b2

Browse files
committed
Redone fix for incorrect detecting unicode characters by re module
1 parent 331bbbf commit a5750b2

File tree

1 file changed

+36
-31
lines changed

1 file changed

+36
-31
lines changed

adafruit_templateengine.py

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -165,32 +165,34 @@ def safe_markdown(value: Any) -> str:
165165
_PRECOMPILED_TOKEN_PATTERN = re.compile(r"{{ .+? }}|{% .+? %}")
166166

167167

168-
def _find_next_extends(template: str):
168+
def _find_next_extends(template: bytes) -> "re.Match[bytes]":
169169
return _PRECOMPILED_EXTENDS_PATTERN.search(template)
170170

171171

172-
def _find_next_block(template: str):
172+
def _find_next_block(template: bytes) -> "re.Match[bytes]":
173173
return _PRECOMPILED_BLOCK_PATTERN.search(template)
174174

175175

176-
def _find_next_include(template: str):
176+
def _find_next_include(template: bytes) -> "re.Match[bytes]":
177177
return _PRECOMPILED_INCLUDE_PATTERN.search(template)
178178

179179

180-
def _find_named_endblock(template: str, name: str):
181-
return re.search(r"{% endblock " + name + r" %}", template)
180+
def _find_named_endblock(template: bytes, name: bytes) -> "re.Match[bytes]":
181+
return re.search(
182+
r"{% endblock ".encode("utf-8") + name + r" %}".encode("utf-8"), template
183+
)
182184

183185

184-
def _exists_and_is_file(path: str):
186+
def _exists_and_is_file(path: str) -> bool:
185187
try:
186188
return (os.stat(path)[0] & 0b_11110000_00000000) == 0b_10000000_00000000
187189
except OSError:
188190
return False
189191

190192

191-
def _resolve_includes(template: str):
193+
def _resolve_includes(template: bytes) -> bytes:
192194
while (include_match := _find_next_include(template)) is not None:
193-
template_path = include_match.group(0)[12:-4]
195+
template_path = include_match.group(0)[12:-4].decode("utf-8")
194196

195197
# TODO: Restrict include to specific directory
196198

@@ -201,19 +203,19 @@ def _resolve_includes(template: str):
201203
with open(template_path, "rt", encoding="utf-8") as template_file:
202204
template = (
203205
template[: include_match.start()]
204-
+ template_file.read()
206+
+ template_file.read().encode("utf-8")
205207
+ template[include_match.end() :]
206208
)
207209
return template
208210

209211

210-
def _check_for_unsupported_nested_blocks(template: str):
212+
def _check_for_unsupported_nested_blocks(template: bytes):
211213
if _find_next_block(template) is not None:
212214
raise ValueError("Nested blocks are not supported")
213215

214216

215-
def _resolve_includes_blocks_and_extends(template: str):
216-
block_replacements: "dict[str, str]" = {}
217+
def _resolve_includes_blocks_and_extends(template: bytes) -> bytes:
218+
block_replacements: "dict[bytes, bytes]" = {}
217219

218220
# Processing nested child templates
219221
while (extends_match := _find_next_extends(template)) is not None:
@@ -223,7 +225,7 @@ def _resolve_includes_blocks_and_extends(template: str):
223225
with open(
224226
extended_template_name, "rt", encoding="utf-8"
225227
) as extended_template_file:
226-
extended_template = extended_template_file.read()
228+
extended_template = extended_template_file.read().encode("utf-8")
227229

228230
# Removed the extend tag
229231
template = template[extends_match.end() :]
@@ -240,18 +242,13 @@ def _resolve_includes_blocks_and_extends(template: str):
240242
if endblock_match is None:
241243
raise ValueError(r"Missing {% endblock %} for block: " + block_name)
242244

243-
# Workaround for bug in re module https://github.com/adafruit/circuitpython/issues/6860
244-
block_content = template.encode("utf-8")[
245-
block_match.end() : endblock_match.start()
246-
].decode("utf-8")
247-
# TODO: Uncomment when bug is fixed
248-
# block_content = template[block_match.end() : endblock_match.start()]
245+
block_content = template[block_match.end() : endblock_match.start()]
249246

250247
_check_for_unsupported_nested_blocks(block_content)
251248

252249
if block_name in block_replacements:
253250
block_replacements[block_name] = block_replacements[block_name].replace(
254-
r"{{ block.super }}", block_content
251+
r"{{ block.super }}".encode("utf-8"), block_content
255252
)
256253
else:
257254
block_replacements.setdefault(block_name, block_content)
@@ -268,14 +265,16 @@ def _resolve_includes_blocks_and_extends(template: str):
268265
return _replace_blocks_with_replacements(template, block_replacements)
269266

270267

271-
def _replace_blocks_with_replacements(template: str, replacements: "dict[str, str]"):
268+
def _replace_blocks_with_replacements(
269+
template: bytes, replacements: "dict[bytes, bytes]"
270+
) -> bytes:
272271
# Replace blocks in top-level template
273272
while (block_match := _find_next_block(template)) is not None:
274273
block_name = block_match.group(0)[9:-3]
275274

276275
# Self-closing block tag without default content
277276
if (endblock_match := _find_named_endblock(template, block_name)) is None:
278-
replacement = replacements.get(block_name, "")
277+
replacement = replacements.get(block_name, "".encode("utf-8"))
279278

280279
template = (
281280
template[: block_match.start()]
@@ -300,7 +299,7 @@ def _replace_blocks_with_replacements(template: str, replacements: "dict[str, st
300299
# Replace default content with replacement
301300
else:
302301
replacement = replacements[block_name].replace(
303-
r"{{ block.super }}", block_content
302+
r"{{ block.super }}".encode("utf-8"), block_content
304303
)
305304

306305
template = (
@@ -312,15 +311,15 @@ def _replace_blocks_with_replacements(template: str, replacements: "dict[str, st
312311
return template
313312

314313

315-
def _find_next_hash_comment(template: str):
314+
def _find_next_hash_comment(template: bytes) -> "re.Match[bytes]":
316315
return _PRECOMPILED_HASH_COMMENT_PATTERN.search(template)
317316

318317

319-
def _find_next_block_comment(template: str):
318+
def _find_next_block_comment(template: bytes) -> "re.Match[bytes]":
320319
return _PRECOMPILED_BLOCK_COMMENT_PATTERN.search(template)
321320

322321

323-
def _remove_comments(template: str):
322+
def _remove_comments(template: bytes) -> bytes:
324323
# Remove hash comments: {# ... #}
325324
while (comment_match := _find_next_hash_comment(template)) is not None:
326325
template = template[: comment_match.start()] + template[comment_match.end() :]
@@ -332,7 +331,7 @@ def _remove_comments(template: str):
332331
return template
333332

334333

335-
def _find_next_token(template: str):
334+
def _find_next_token(template: bytes) -> "re.Match[bytes]":
336335
return _PRECOMPILED_TOKEN_PATTERN.search(template)
337336

338337

@@ -344,6 +343,10 @@ def _create_template_function( # pylint: disable=,too-many-locals,too-many-bran
344343
context_name: str = "context",
345344
dry_run: bool = False,
346345
) -> "Generator[str] | str":
346+
# Workaround for bug in re module https://github.com/adafruit/circuitpython/issues/6860
347+
# TODO: Remove .encode() and .decode() when bug is fixed
348+
template: bytes = template.encode("utf-8")
349+
347350
# Resolve includes, blocks and extends
348351
template = _resolve_includes_blocks_and_extends(template)
349352

@@ -360,10 +363,10 @@ def _create_template_function( # pylint: disable=,too-many-locals,too-many-bran
360363

361364
# Resolve tokens
362365
while (token_match := _find_next_token(template)) is not None:
363-
token = token_match.group(0)
366+
token: str = token_match.group(0).decode("utf-8")
364367

365368
# Add the text before the token
366-
if text_before_token := template[: token_match.start()]:
369+
if text_before_token := template[: token_match.start()].decode("utf-8"):
367370
function_string += (
368371
indent * indentation_level + f"yield {repr(text_before_token)}\n"
369372
)
@@ -452,9 +455,11 @@ def _create_template_function( # pylint: disable=,too-many-locals,too-many-bran
452455
# Continue with the rest of the template
453456
template = template[token_match.end() :]
454457

455-
# Add the text after the last token (if any) and return
458+
# Add the text after the last token (if any)
456459
if template:
457-
function_string += indent * indentation_level + f"yield {repr(template)}\n"
460+
function_string += (
461+
indent * indentation_level + f"yield {repr(template.decode('utf-8'))}\n" #
462+
)
458463

459464
# If dry run, return the template function string
460465
if dry_run:

0 commit comments

Comments
 (0)