Module `paroxython.label_programs`

List all the programs of a given directory, with their field labels populated.

Expand source code Browse GitHub

from pathlib import Path
from typing import Callable, Iterator, List, Set

import regex  # type: ignore

from .list_programs import iterate_and_print_programs, list_programs
from .parse_program import ProgramParser
from .user_types import Label, LabelName, Programs, Source


def labelled_programs(
    directory: Path,
    search_imported_program_path: Callable = regex.compile(r"import(?:_module)?:([^:]*)").search,
    print_performances: bool = False,
    **kwargs,
) -> Programs:
    programs: Programs = list_programs(directory, **kwargs)
    internal_program_paths = {p.path.replace("/", ".") for p in programs}  # path sep -> import sep
    internal_program_paths.add(".py")  # for `from . import foobar`, the module name will be ""
    parse = ProgramParser()
    print(f"Labelling {len(programs)} programs.")
    for program in iterate_and_print_programs(programs):
        program.labels[:] = parse(program)  # populate this field in place with [:]
        for (i, label) in enumerate(program.labels):
            m = search_imported_program_path(label.name)
            if m and f"{m[1]}.py" in internal_program_paths:
                tweaked_label_name = label.name.replace(":", "_internally:", 1)
                tweaked_label_name = tweaked_label_name.replace(".", "/")  # import sep -> path sep
                program.labels[i] = Label(name=LabelName(tweaked_label_name), spans=label.spans)
    if print_performances:  # pragma: no cover
        parse.print_performances()
    return programs


def generate_labelled_sources(programs: Programs) -> Iterator:
    separator = "-" * 88
    for program in programs:
        yield Source(f"# {separator}\n# {program.path}\n# {separator}")
        lines = program.source.splitlines()
        if lines:
            comments: List[Set[str]] = [set() for _ in lines]
            for label in program.labels:
                for span in label.spans:
                    span_length = span.end - span.start
                    span_suffix = f" (-> +{span_length})" if span_length else ""
                    comments[span.start - 1].add(f"{label.name}{span_suffix}")
            for (i, comment) in enumerate(comments):
                if comment:
                    lines[i] += " # " + ", ".join(sorted(comment))
        yield Source("\n".join(lines + [""]))


if __name__ == "__main__":
    programs = labelled_programs(Path("docs/resources"))
    for result in generate_labelled_sources(programs):
        print(result)

Functions

def labelled_programs(directory: pathlib.Path, search_imported_program_path: Callable = <built-in method search of regex>, print_performances: bool = False, **kwargs ) ‑> List[Program]

Walk a given directory, label all its programs, and return a list of them.

Args

directory : Path: The directory to walk, containing some Python programs.
search_imported_program_path : Callable, optional: A function taking a label name and, in the case it starts with "import:" or "import_module:", returns a match object whose first group is the name of the imported program. Not to be explicitly provided.
print_performances : bool: If True, ends with a call to ProgramParser.print_performances(). Defaults to False.
**kwargs: May include the keyword arguments cleanup_strategy, skip_pattern, glob_pattern, transmitted to list_programs().

Note

In addition to creating a list of programs with a populated labels field, tweak all of those which mark an importation of an “internal” module, i.e, a program belonging to this very list. For instance, a label "import:my_program" would be transformed into "import_internally:my_program", while "import:itertools" would be left untouched.

Expand source code Browse GitHub

def labelled_programs(
    directory: Path,
    search_imported_program_path: Callable = regex.compile(r"import(?:_module)?:([^:]*)").search,
    print_performances: bool = False,
    **kwargs,
) -> Programs:
    programs: Programs = list_programs(directory, **kwargs)
    internal_program_paths = {p.path.replace("/", ".") for p in programs}  # path sep -> import sep
    internal_program_paths.add(".py")  # for `from . import foobar`, the module name will be ""
    parse = ProgramParser()
    print(f"Labelling {len(programs)} programs.")
    for program in iterate_and_print_programs(programs):
        program.labels[:] = parse(program)  # populate this field in place with [:]
        for (i, label) in enumerate(program.labels):
            m = search_imported_program_path(label.name)
            if m and f"{m[1]}.py" in internal_program_paths:
                tweaked_label_name = label.name.replace(":", "_internally:", 1)
                tweaked_label_name = tweaked_label_name.replace(".", "/")  # import sep -> path sep
                program.labels[i] = Label(name=LabelName(tweaked_label_name), spans=label.spans)
    if print_performances:  # pragma: no cover
        parse.print_performances()
    return programs

def generate_labelled_sources(programs: List[Program]) ‑> Iterator

For each program, yield its source with its labels in comment.

Args

programs : Programs: A list of labelled programs.

Yields

Iterator: The lines of the given programs, with their labels in comment.

Note

This function is for testing purposes only. See an example of the result in labelled_sources.py.

Expand source code Browse GitHub

def generate_labelled_sources(programs: Programs) -> Iterator:
    separator = "-" * 88
    for program in programs:
        yield Source(f"# {separator}\n# {program.path}\n# {separator}")
        lines = program.source.splitlines()
        if lines:
            comments: List[Set[str]] = [set() for _ in lines]
            for label in program.labels:
                for span in label.spans:
                    span_length = span.end - span.start
                    span_suffix = f" (-> +{span_length})" if span_length else ""
                    comments[span.start - 1].add(f"{label.name}{span_suffix}")
            for (i, comment) in enumerate(comments):
                if comment:
                    lines[i] += " # " + ", ".join(sorted(comment))
        yield Source("\n".join(lines + [""]))