Source on GitHub

Module paroxython.list_programs

Scan a directory for Python programs and yield the corresponding Program objects.

Expand source code Browse GitHub
from pathlib import Path
from typing import Iterator

import regex  # type: ignore

from .preprocess_source import Cleanup, centrifugate_hints, collect_hints, remove_hints
from .user_types import Program, ProgramPath, Programs, Source

def list_programs(
    directory: Path,
    cleanup_strategy: str = "full",
    glob_pattern: str = "",
    skip_pattern: str = "",
) -> Programs:
    result: Programs = []
    cleanup = Cleanup(cleanup_strategy)
    glob_pattern = glob_pattern or "**/*.py"
    skip_pattern = skip_pattern or r"(__init__|setup|.*[-_]tests?)\.py"
    match_excluded = regex.compile(skip_pattern).fullmatch
    for path in sorted(directory.glob(glob_pattern)):
        if not match_excluded(
            source =
            relative_path = path.relative_to(directory)
            result.append(get_program(source, relative_path))
    return result

def get_program(source: Source, relative_path: Path = None) -> Program:
    source = centrifugate_hints(source)
    (addition, deletion) = collect_hints(source)
    source = remove_hints(source)
    return Program(
        path=ProgramPath(str(relative_path or Path())),

def iterate_and_print_programs(programs: Programs) -> Iterator[Program]:
    blanks = ""
    for (i, program) in enumerate(programs, 1):
        print(end=f"\r{blanks}\r{i: 5} {program.path}", flush=True)
        blanks = " " * (len(program.path) + 7)
        if i == len(programs):  # Placed after the loop, the next line would not be executed.
            print(end=f"\r{blanks}\r", flush=True)
        yield program

if __name__ == "__main__":
    datetime = __import__("datetime").datetime
    directory = Path("../Algo/programs/")
    for program in list_programs(
        path = directory / program.path
        print("-" * 80)


def list_programs(directory: pathlib.Path,
cleanup_strategy: str = 'full',
glob_pattern: str = '',
skip_pattern: str = '',
) ‑> List[Program]

List (by default recursively) all Python programs of a given directory.


directory : Path
The directory to search.
cleanup_strategy : str, optional
Describes how to clean the source codes. Passed to Cleanup. Defaults to "full".
glob_pattern : str, optional
Describes which files to yield in directory. Passed to the standard library pathlib's Path.glob(). If empty, replaced by "**/*.py", which means “all Python source files in directory and all its subdirectories, recursively”. Defaults to "".
skip_pattern : str, optional
Describes how to filter out the yielded files. If empty, skip any file whose name is "", "" or ends with "", "", "" or "". Defaults to "".
Ignored keyword arguments.


A list of Program objects as constructed by get_program().
Expand source code Browse GitHub
def list_programs(
    directory: Path,
    cleanup_strategy: str = "full",
    glob_pattern: str = "",
    skip_pattern: str = "",
) -> Programs:
    result: Programs = []
    cleanup = Cleanup(cleanup_strategy)
    glob_pattern = glob_pattern or "**/*.py"
    skip_pattern = skip_pattern or r"(__init__|setup|.*[-_]tests?)\.py"
    match_excluded = regex.compile(skip_pattern).fullmatch
    for path in sorted(directory.glob(glob_pattern)):
        if not match_excluded(
            source =
            relative_path = path.relative_to(directory)
            result.append(get_program(source, relative_path))
    return result
def get_program(source: Source,
relative_path: pathlib.Path = None
) ‑> Program

Construct a fresh Program object from its source code and (optionally) relative path.


At this stage, the source code is already preprocessed by paroxython.preprocess_source, which among other things means it has been stripped from all its comments, except those consisting of one or several manual hints. The following operations are then carried out:

  1. Centrifugate the all-encompassing hints found in the source code (see centrifugate_hints()).
  2. Collect all hints, determining whether they must be added to or removed from the labels which will later be found by labelled_programs().
  3. Remove all hints from the source code.
  4. Return a new Program (details below).


source : Source
A source code, already preprocessed.
relative_path : Path, optional
A path relative to the directory passed to list_programs(). Can be omitted for testing purposes or when the source code is the contents of a Jupyter Notebook's cell. Defaults to None.


A NamedTuple consisting in the following fields:
  • path (type ProgramPath, derived from str): the program path, either empty or relative to the directory passed to list_programs();
  • source (type Source, derived from str): its code source, fully cleaned up;
  • addition (type Dict[LabelName, List[Span]]): the manual hints scheduled for addition;
  • deletion (type Dict[LabelName, List[Span]]): the manual hints scheduled for deletion;
  • labels (type List[Label], where each Label consists in a label name and a list of spans). This list is created empty here, to be later populated by labelled_programs().
  • taxa (type List[Taxa], where each Taxon consists in a taxon name and a bag of spans). This list is created empty here, to be later calculated from the labels by Taxonomy.
Expand source code Browse GitHub
def get_program(source: Source, relative_path: Path = None) -> Program:
    source = centrifugate_hints(source)
    (addition, deletion) = collect_hints(source)
    source = remove_hints(source)
    return Program(
        path=ProgramPath(str(relative_path or Path())),
def iterate_and_print_programs(programs: List[Program]) ‑> Iterator[Program]

Iterate on a list of programs while printing their names as a side effect.


programs : Programs
A list of Program objects, as returned by list_programs().


The same Program objects in the same order.


This simple wrapper should print the name of each program over the previous one to avoid flooding the screen. Unfortunately, this may result in even more flood if your console does not support ANSI escape codes.

Expand source code Browse GitHub
def iterate_and_print_programs(programs: Programs) -> Iterator[Program]:
    blanks = ""
    for (i, program) in enumerate(programs, 1):
        print(end=f"\r{blanks}\r{i: 5} {program.path}", flush=True)
        blanks = " " * (len(program.path) + 7)
        if i == len(programs):  # Placed after the loop, the next line would not be executed.
            print(end=f"\r{blanks}\r", flush=True)
        yield program