from fastcore.test import test_eq, ExceptionExpected
from textwrap import dedent
code_utils
Stata-related helper functions with no Jupyter or pystata dependence
These are mostly intended for use in the noecho
module.
Handling Stata comments and #delimit;
remove_comments
remove_comments (code)
'tab size if inlist(rt_from_to, "A10TOU to B10", "E19 to B19") //"E20 to B20", ", sort') remove_comments(
'tab size if inlist(rt_from_to, "A10TOU to B10", "E19 to B19") '
test_eq("""\
remove_comments(dedent( disp ///
1""")),
"disp 1")
Correctly ignores “///” when not preceded by a space:
test_eq("""\
remove_comments(dedent( disp///
1
""")),
"""\
dedent( disp///
1
""")
)
test_eq("""\
remove_comments(dedent( /*
blah
blah
*/
list var
""")),
"""\
list var
"""
)
ends_in_comment_block
ends_in_comment_block (code)
'tab size /*if ') ends_in_comment_block(
True
= 'tab size /*if */\n*'
code '*/')+2:] code[code.rfind(
'\n*'
'tab size /*if '), True)
test_eq(ends_in_comment_block('tab size /*if */'), False)
test_eq(ends_in_comment_block('tab size /*if */\n*'), False)
test_eq(ends_in_comment_block('tab size /*if */\n//'), False) test_eq(ends_in_comment_block(
"""\
delimit_regex.split(dedent(disp 3
#delimit cr
disp 1
disp 2
"""), maxsplit=1)
['disp 3\n', ' cr', '\ndisp 1\ndisp 2\n']
_replace_delimiter
_replace_delimiter (code, sc_delimiter=False)
Note: the following more-complicated regex would detect valid delimiters plus macros:
= re.compile(r'#delimit( |\t)+(;|cr|`.+\'|\$_.+|\$.+)') delimit_regex
but that’s unnecessary, since Stata’s #delimit x
interprets any x
other than ‘cr’ as switching the delimiter to ‘;’.
test_eq("""\
_replace_delimiter(dedent( list var1
#delimit;
list var2;list var3;
list
var4;
""")),
"""\
dedent( list var1
list var2
list var3
list var4
""")
)
"""\
test_eq(_replace_delimiter(dedent( disp "start"
#delimit;
disp "hello"; disp "hello2";
disp
"hello2a";
#delimit cr
disp "hello3"
disp "hello4"
#delimit;""")),
"""\
dedent( disp "start"
disp "hello"
disp "hello2"
disp "hello2a"
disp "hello3"
disp "hello4"
"""))
"""\
_replace_delimiter(dedent(disp 3
#delimit cr
disp 1
disp 2
"""), sc_delimiter=True)
''
"\tsum"), " sum") test_eq(_replace_tabs(
valid_single_line_code
valid_single_line_code (code)
'tab size if inlist(rt_from_to, "A10TOU to B10") // E20'),
test_eq(valid_single_line_code('tab size if inlist(rt_from_to, "A10TOU to B10") ')
'#delimit ;'),
test_eq(valid_single_line_code('')
'#delimit cr'),
test_eq(valid_single_line_code('')
ending_sc_delimiter
ending_sc_delimiter (code, sc_delimiter=False)
test_eq("""\
ending_sc_delimiter(dedent( list var1
#delimit;
list var2;list var3;
""")),
True)
test_eq("""\
ending_sc_delimiter(dedent( /*
#delimit;
*/
disp 1
disp 2""")),
False)
standardize_code
standardize_code (code, sc_delimiter=False)
Remove comments spanning multiple lines and replace custom delimiters
test_eq("""\
standardize_code(dedent( list var1
#delimit;
list var2; list var3;
list
var4;
""")),
"""\
dedent( list var1
list var2
list var3
list var4""")
)
test_eq("""\
standardize_code(dedent( /*
blah
blah
*/
list var
""")),
"list var")
test_eq("""\
standardize_code(dedent( disp /// comment
1
""")),
"disp 1")
"list var"), "list var") test_eq(standardize_code(
'''\
standardize_code(display "displayed1"
/*
display "displayed2"
*/
display "displayed3"''')
'display "displayed1"\ndisplay "displayed3"'
Detect version command
ending_code_version
ending_code_version (code, sc_delimiter=False, code_version=None, stata_version='17.0')
Based on my trial and error, it seems that Stata’s version
command (as of version 17.0) accepts any number between 1 and your Stata version (inclusive) with up to two decimal places.
test_eq("#delimit ;")),
ending_code_version(dedent(None)
test_eq(" version 15")),
ending_code_version(dedent("15")
test_eq("version 15.0")),
ending_code_version(dedent("15")
test_eq("version 15.1")),
ending_code_version(dedent("15.1")
test_eq("version 15.141")),
ending_code_version(dedent(None)
test_eq("version 23")),
ending_code_version(dedent(None)
test_eq("version 0.7")),
ending_code_version(dedent(None)
test_eq("version 17")),
ending_code_version(dedent(None)
test_eq("version 17.0")),
ending_code_version(dedent(None)
test_eq("version 17.0"), stata_version="17.00"),
ending_code_version(dedent(None)
test_eq("version 18.0"), stata_version="18.00"),
ending_code_version(dedent(None)
test_eq("version 18.0"), stata_version="18.10"),
ending_code_version(dedent("18")
Check for specific commands in std_code
bool(local_def_in(" sysuse auto")), False)
test_eq(bool(local_def_in(" loc auto=1")), True)
test_eq(bool(local_def_in("qui n cap local auto=1")), True)
test_eq(bool(local_def_in("list local auto")), False)
test_eq(bool(local_def_in("tempfile file1")), True)
test_eq(bool(local_def_in(" capture token file1")), True)
test_eq(bool(local_def_in("mata: st_local(test1, 2)")), True)
test_eq(bool(local_def_in("levelsof var1")), True) test_eq(
bool(preserve_restore_in("sysuse auto")), False)
test_eq(bool(preserve_restore_in("preserve")), True)
test_eq(bool(preserve_restore_in("preserve\nkeep in 1")), True)
test_eq(bool(preserve_restore_in("restore,")), True)
test_eq(bool(preserve_restore_in("count\nrestore")), True)
test_eq(bool(preserve_restore_in("gen restore=1")), False) test_eq(
Separate out Stata program code
…because such code (as well as python/mata blocks) is unsuitable for run_as_program
is_start_of_program_block
is_start_of_program_block (std_code_line)
"capture noisily program test_program"), True)
test_eq(is_start_of_program_block(" capture noisily list var"), False)
test_eq(is_start_of_program_block("pr l display1"), False) test_eq(is_start_of_program_block(
break_out_prog_blocks
break_out_prog_blocks (code, sc_delimiter=False)
test_eq('''\
break_out_prog_blocks(dedent( capture program drop ender
program define ender
disp "ender output"
end
capture program drop display2
program define display2
ender
end
display2
''')),
'is_prog': False, 'std_code': 'capture program drop ender'},
[{'is_prog': True,
{'std_code': 'program define ender\n disp "ender output"\nend'},
'is_prog': False, 'std_code': 'capture program drop display2'},
{'is_prog': True, 'std_code': 'program define display2\n ender\nend'},
{'is_prog': False, 'std_code': 'display2'}]
{ )