Source code for segram.symbols

"""Symbols are used for denoting discrete enumerable types
of entities of a given class, for instance semantic roles
such as subjects or direct objects of verb.

The advantage of using symbols, implemented based on :class:`enum.Flag`,
is that it allows combining and filtering based on symbols using binary
boolean operators.
"""
from typing import Self
from enum import Flag, auto

__all__ = ("POS", "Role", "Tense", "Modal", "Mood")


class Symbol(Flag):
    """Base class for symbols."""
    # pylint: disable=function-redefined,invalid-overridden-method
    def __str__(self) -> str:
        if self.value == 0:
            return ""
        return super().__str__().split(".")[-1].lower()

    @property
    def name(self) -> str:
        return str(self).lower() or None

    @classmethod
    def from_name(cls, name: str) -> Self:
        neg = False
        if name.startswith("~"):
            name = name[1:]
            neg = True
        parts = name.split("|")
        sym =  cls(0)
        for part in parts:
            sym |= getattr(cls, part)
        return ~sym if neg else sym


[docs] class POS(Symbol): """Universal dependencies POS tags. See definitions `here <https://universaldependencies.org/u/pos/>`_. """ # Open class words ADJ = auto() ADV = auto() INTJ = auto() NOUN = auto() PROPN = auto() VERB = auto() # Closed class words ADP = auto() AUX = auto() CCONJ = auto() DET = auto() NUM = auto() PART = auto() PRON = auto() SCONJ = auto() # Other PUNCT = auto() SYM = auto() X = auto() # Combinations and aliases OTHER = X @classmethod def from_name(cls, name: str) -> Self: return super().from_name(name.upper())
[docs] class Role(Symbol): """Syntactic role symbols. Unlike POS tags (and standard syntactic dependencies), syntactic roles are not fixed but may be phrase-specific. Thus, the same token can play different syntactic roles in different phrases, e.g. it can be a direct object in one verb phrase and a subject in another. Moreover, roles are used for marking tokens of which functions can not be simply determined by their POS or dependency tags, e.g. negations, even though such roles may often be fixed and not phrase-specific. Notes ----- Currently, roles are used primarily for printing purposes, i.e. coloring tokens in console outputs, and therefore also selected, most important, roles are defined. Moreover, it is still an open question what roles should be defined. Attributes ---------- VERB Verb or a verb-like predicate. NOUN Noun, typically the head of a noun phrase. SUBJ Subject (active, passive, nominal or clausal etc.). DOBJ Direct object of a verb or a description. IOBJ Indirect object of a verb (dative). PREP Preposition. POBJ Object of a preposition. PROOT Root of a preposition. DESC Description. This includes adjectives, adverbs, and adjectival and adverbial modifiers as well as any other sort of construction used to directly describe nouns, verbs and prepositions. BG Background element that should be not emphasized visually when printing, e.g. printed in gray. NEG Negation. QMARK Question mark. EXCLAM Exclamation mark. INTJ Interjection. """ # Component-specific roles VERB = auto() NOUN = auto() SUBJ = auto() DOBJ = auto() IOBJ = auto() PREP = auto() POBJ = auto() PROOT = auto() DESC = auto() BG = auto() # Fixed roles NEG = auto() QMARK = auto() EXCLAM = auto() INTJ = auto() @classmethod def from_name(cls, name: str) -> POS: return super().from_name(name.upper())
class Dep(Symbol): """Component dependency symbols. Attributes ---------- root Sentence root. subj Subject. dobj Direct object. iobj Indirect object. pobj Prepositional object. prep Preposition. subcl Subclause. relcl Relative clause. acl Clausal modifier of noun (adnominal clause). xcomp Open clausal complement. desc Description. cdesc Clausal description. adesc Adjectival complement description. nmod Modifier of nominal. appos Appositional modifier. agent Agent token (introducing passive subjects). conj Conjunct. misc Miscellaneous (all other dependency roles). """ # pylint: disable=invalid-name root = auto() subj = auto() dobj = auto() iobj = auto() pobj = auto() prep = auto() subcl = auto() relcl = auto() acl = auto() xcomp = auto() desc = auto() adesc = auto() cdesc = auto() nmod = auto() appos = auto() agent = auto() conj = auto() misc = auto() @classmethod def from_name(cls, name: str) -> POS: return super().from_name(name.lower()) @property def role(self) -> Role | None: # pylint: disable=too-many-return-statements names = self.name.split("|") for name in names: try: if "subj" in name: return Role.SUBJ if "obj" in name: return Role.from_name(name) if "desc" in name: return Role.DESC if name.endswith("cl"): return Role.VERB if name in ("nmod", "appos"): return Role.NOUN if name in ("agent", "prep"): return Role.PREP except AttributeError: pass return None
[docs] class Tense(Symbol): """Tense symbols. Attributes ---------- PAST Past tense PRESENT Present tense. FUTURE Future tense. """ PAST = auto() PRESENT = auto() FUTURE = auto() @classmethod def from_name(cls, name: str) -> POS: return super().from_name(name.upper())
[docs] class Mood(Symbol): """Grammatical mood. Currently only few selected moods are implemented. Attributes ---------- REAL Standard indicative mood (realis). IMP Imperative mood. """ REAL = auto() IMP = auto() @classmethod def from_name(cls, name: str) -> POS: return super().from_name(name.upper())