code_utils

Stata-related helper functions with no Jupyter or pystata dependence

These are mostly intended for use in the noecho module.

from fastcore.test import test_eq, ExceptionExpected
from textwrap import dedent

Handling Stata comments and #delimit;


source

remove_comments

 remove_comments (code)
remove_comments('tab size if inlist(rt_from_to, "A10TOU to B10", "E19 to B19")  //"E20 to B20", ", sort')
'tab size if inlist(rt_from_to, "A10TOU to B10", "E19 to B19")  '
test_eq(
    remove_comments(dedent("""\
        disp ///
        1""")),
    "disp 1")

Correctly ignores “///” when not preceded by a space:

test_eq(
    remove_comments(dedent("""\
        disp///
        1
        """)),
    dedent("""\
        disp///
        1
        """)
)
test_eq(
    remove_comments(dedent("""\
        /*
        blah
        blah
        */
        list var
        """)),
    """\

list var
"""
)

source

ends_in_comment_block

 ends_in_comment_block (code)
ends_in_comment_block('tab size /*if ')
True
code = 'tab size /*if */\n*'
code[code.rfind('*/')+2:]
'\n*'
test_eq(ends_in_comment_block('tab size /*if '), True)
test_eq(ends_in_comment_block('tab size /*if */'), False)
test_eq(ends_in_comment_block('tab size /*if */\n*'), False)
test_eq(ends_in_comment_block('tab size /*if */\n//'), False)
delimit_regex.split(dedent("""\
disp 3
#delimit cr
disp 1
disp 2
"""), maxsplit=1)
['disp 3\n', ' cr', '\ndisp 1\ndisp 2\n']

source

_replace_delimiter

 _replace_delimiter (code, sc_delimiter=False)

Note: the following more-complicated regex would detect valid delimiters plus macros:

delimit_regex = re.compile(r'#delimit( |\t)+(;|cr|`.+\'|\$_.+|\$.+)')

but that’s unnecessary, since Stata’s #delimit x interprets any x other than ‘cr’ as switching the delimiter to ‘;’.

test_eq(
    _replace_delimiter(dedent("""\
        list var1
        #delimit;
        list var2;list var3;
        list
        var4;
        """)),
    dedent("""\
        list var1
         list var2
        list var3
         list var4
        """)
)
test_eq(_replace_delimiter(dedent("""\
    disp "start"
    #delimit;
    disp "hello"; disp "hello2";
    disp 
        "hello2a";
    #delimit cr
    disp "hello3"
    disp "hello4"
    #delimit;""")), 
        dedent("""\
    disp "start"
    disp "hello"
     disp "hello2"
     disp      "hello2a"
     disp "hello3"
    disp "hello4"
    """))
_replace_delimiter(dedent("""\
disp 3
#delimit cr
disp 1
disp 2
"""), sc_delimiter=True)
''
test_eq(_replace_tabs("\tsum"), "    sum")

source

valid_single_line_code

 valid_single_line_code (code)
test_eq(valid_single_line_code('tab size if inlist(rt_from_to, "A10TOU to B10")  // E20'), 
        'tab size if inlist(rt_from_to, "A10TOU to B10")  ')
test_eq(valid_single_line_code('#delimit ;'), 
        '')
test_eq(valid_single_line_code('#delimit cr'), 
        '')

source

ending_sc_delimiter

 ending_sc_delimiter (code, sc_delimiter=False)
test_eq(
    ending_sc_delimiter(dedent("""\
        list var1
        #delimit;
        list var2;list var3;
        """)),
    True)
test_eq(
    ending_sc_delimiter(dedent("""\
        /*
        #delimit;
        */
        disp 1
        disp 2""")),
    False)

source

standardize_code

 standardize_code (code, sc_delimiter=False)

Remove comments spanning multiple lines and replace custom delimiters

test_eq(
    standardize_code(dedent("""\
        list var1
        #delimit;
        list var2; list var3;
        list
        var4;
        """)), 
    dedent("""\
        list var1
         list var2
         list var3
         list var4""")
)
test_eq(
    standardize_code(dedent("""\
        /*
        blah
        blah
        */
        list var
        """)), 
    "list var")
test_eq(
    standardize_code(dedent("""\
        disp /// comment
        1
        """)),
    "disp 1")
test_eq(standardize_code("list    var"), "list var")
standardize_code('''\
display "displayed1"
/*
display "displayed2"
*/
display "displayed3"''')
'display "displayed1"\ndisplay "displayed3"'

Detect version command


source

ending_code_version

 ending_code_version (code, sc_delimiter=False, code_version=None,
                      stata_version='17.0')

Based on my trial and error, it seems that Stata’s version command (as of version 17.0) accepts any number between 1 and your Stata version (inclusive) with up to two decimal places.

test_eq(
    ending_code_version(dedent("#delimit ;")),
    None)
test_eq(
    ending_code_version(dedent(" version 15")),
    "15")
test_eq(
    ending_code_version(dedent("version 15.0")),
    "15")
test_eq(
    ending_code_version(dedent("version 15.1")),
    "15.1")
test_eq(
    ending_code_version(dedent("version 15.141")),
    None)
test_eq(
    ending_code_version(dedent("version 23")),
    None)
test_eq(
    ending_code_version(dedent("version 0.7")),
    None)
test_eq(
    ending_code_version(dedent("version 17")),
    None)
test_eq(
    ending_code_version(dedent("version 17.0")),
    None)
test_eq(
    ending_code_version(dedent("version 17.0"), stata_version="17.00"),
    None)
test_eq(
    ending_code_version(dedent("version 18.0"), stata_version="18.00"),
    None)
test_eq(
    ending_code_version(dedent("version 18.0"), stata_version="18.10"),
    "18")

Check for specific commands in std_code

test_eq(bool(local_def_in(" sysuse auto")), False)
test_eq(bool(local_def_in(" loc auto=1")), True)
test_eq(bool(local_def_in("qui n cap local auto=1")), True)
test_eq(bool(local_def_in("list local auto")), False)
test_eq(bool(local_def_in("tempfile file1")), True)
test_eq(bool(local_def_in(" capture token file1")), True)
test_eq(bool(local_def_in("mata: st_local(test1, 2)")), True)
test_eq(bool(local_def_in("levelsof var1")), True)
test_eq(bool(preserve_restore_in("sysuse auto")), False)
test_eq(bool(preserve_restore_in("preserve")), True)
test_eq(bool(preserve_restore_in("preserve\nkeep in 1")), True)
test_eq(bool(preserve_restore_in("restore,")), True)
test_eq(bool(preserve_restore_in("count\nrestore")), True)
test_eq(bool(preserve_restore_in("gen restore=1")), False)

Separate out Stata program code

…because such code (as well as python/mata blocks) is unsuitable for run_as_program


source

is_start_of_program_block

 is_start_of_program_block (std_code_line)
test_eq(is_start_of_program_block("capture noisily program test_program"), True)
test_eq(is_start_of_program_block(" capture noisily list var"), False)
test_eq(is_start_of_program_block("pr l display1"), False)

source

break_out_prog_blocks

 break_out_prog_blocks (code, sc_delimiter=False)
test_eq(
    break_out_prog_blocks(dedent('''\
        capture program drop ender
        program define ender
            disp "ender output"
        end
        capture program drop display2
        program define display2
            ender
        end
        display2
        ''')),
    [{'is_prog': False, 'std_code': 'capture program drop ender'},
     {'is_prog': True,
      'std_code': 'program define ender\n    disp "ender output"\nend'},
     {'is_prog': False, 'std_code': 'capture program drop display2'},
     {'is_prog': True, 'std_code': 'program define display2\n    ender\nend'},
     {'is_prog': False, 'std_code': 'display2'}]
)