Source code for segram.nlp.grammar.components

# pylint: disable=abstract-method
from typing import Any, Iterable, Sequence, ClassVar
from abc import abstractmethod
from .grammar import GrammarNLP
from ..tokens import Token
from ...grammar import Component
from ...symbols import POS, Role, Dep


[docs] class ComponentNLP(GrammarNLP, Component): """Abstract base class for grammar components with NLP backend methods. """ __slots__ = () __post_init__ = () __inherit_from_lead__ = () post_init: ClassVar[tuple[str, ...]] = () inherit_from_lead: ClassVar[tuple[str, ...]] = () def __init_subclass__(cls) -> None: super().__init_subclass__() cls.init_class_attrs({ "__inherit_from_lead__": "inherit_from_lead", "__post_init__": "post_init" }, check_slots=False) for typ, prefix, names in zip( ["component", "attr"], ["find", "get"], [cls.token_names, cls.attr_names] ): missing = [ meth for n in names if not hasattr(cls, (meth := f"{prefix}_{n}")) ] if missing: raise TypeError(f"missing '{typ}' discovery methods: {missing}") # Abstract methods --------------------------------------------------------
[docs] @classmethod @abstractmethod def is_head(cls, tok: Token) -> bool: """Test for head token.""" raise NotImplementedError
[docs] @abstractmethod def get_dep(self, parent: Component) -> Dep | None: """Get dependency between ``self`` and ``parent``.""" raise NotImplementedError
# Methods -----------------------------------------------------------------
[docs] def is_child_of(self, comp: Component) -> bool: """Is ``self`` a child of ``comp``.""" if not comp.head.is_ancestor(self.head): return False return comp.head == self.head.head and not self.head.is_root
[docs] def find_parents(self, comps: Sequence[Component]): """Find parents of ``self`` contained in ``comps``.""" for comp in comps: if self.is_child_of(comp): yield comp
[docs] def get_sconj(self, parent: Component) -> Token | None: """Get conjunction subordinating ``self`` to ``parent``.""" if self.head.head == parent.head: for tok in self.subtokens: if tok.is_sconj: return tok return None
[docs] def get_cconj(self, other: Component) -> Token | None: """Get conjunction token coordinating ``self`` and ``other``.""" conjs = self.head.conjuncts if other.head not in conjs: return None for conj in (self.head, *conjs): for child in conj.children: if child.is_cconj: return child return None
[docs] @classmethod def from_tok( cls, tok: Token, pos: POS | None = None, role: Role | None = None, **kwds: Any ) -> Component: """Construct from a token. Parameters ---------- tok NLP token object. pos POS tag to assign to the tok token. Determined automatically if ``None``. role Syntactic role assigned to the tok token and the entire component. Determined autoamtically if ``None``. **kwds Passed to :meth:`__init__`. """ # pylint: disable=too-many-branches # pylint: disable=too-many-locals if not cls.is_head(tok): return None def add_tok(tok, name, slots): if isinstance(tok, Iterable): tok = tuple(tok) if tok: slots.setdefault(name, []).extend(tok) return tok elif tok: slots[name] = tok return tok return None role = role or cls.__role__ if isinstance(role, str): role = Role.from_name(role) typ = cls.get_comp_type(role, tok.pos) if typ is not cls: return typ.from_tok(tok, pos, role=role, **kwds) slots = {} finders = { name: getattr(cls, f"find_{name}") for name in cls.token_names if name not in cls.post_init } for child in tok.children: for name, finder in finders.items(): if (v := slots.get(name)) and isinstance(v, Token): continue if add_tok(finder(child), name, slots): break # Apply finders to lead children for missing tokens ------------------- if tok != (lead := tok.lead): for name in cls.inherit_from_lead: if name in slots: continue finder = getattr(cls, f"find_{name}") slots[name] = next((finder(c) for c in lead.children), None) comp = cls(tok, role=role, **slots, **kwds) # Apply post-init finders --------------------------------------------- for name in comp.__class__.post_init: finder = getattr(comp.__class__, f"find_{name}") tok = add_tok(finder(comp), name, slots) setattr(comp, name, tok) # Get and set attributes ---------------------------------------------- for attr in cls.attr_names: if attr not in kwds: val = getattr(cls, f"get_{attr}")(comp) setattr(comp, attr, val) return comp