Source code for segram.nlp.tokens.token

# pylint: disable=too-many-public-methods,no-name-in-module
from typing import Any, Iterable, Self
from abc import abstractmethod
from spacy.tokens import MorphAnalysis, Token as SpacyToken
from .abc import NLP
from ...symbols import POS, Role
from ...utils.colors import color_role
from ...utils.diff import iter_diffs, equal, IDiffType


[docs] class Token(NLP): """Token wrapper class.""" __slots__ = () def __repr__(self) -> str: return self.to_str(color=True) def __hash__(self) -> int: return super().__hash__() def __eq__(self, other: Self) -> bool: if (res := super().__eq__(other)) is NotImplemented: return res return res and self.i == other.i
[docs] def __lt__(self, other: Self) -> bool: """Is ``self`` earlier in the document than ``other``.""" if self.is_comparable_with(other): return self.i < other.i return NotImplemented
# Abstract properties ----------------------------------------------------- @property @abstractmethod def is_negation(self) -> bool: pass @property @abstractmethod def is_qmark(self) -> bool: pass @property @abstractmethod def is_exclam(self) -> bool: pass @property @abstractmethod def is_intj(self) -> bool: pass # Properties -------------------------------------------------------------- @property def i(self) -> int: return self.tok.i @property def whitespace(self) -> str: return self.tok.whitespace_ @property def whitespace_(self) -> str: return self.tok.whitespace_ @property def text_with_ws(self) -> str: return self.tok.text_with_ws @property def pos(self) -> POS: return POS.from_name(self.tok.pos_) @property def role(self) -> Role: if self.is_negation: return Role.NEG if self.is_qmark: return Role.QMARK if self.is_exclam: return Role.EXCLAM if self.is_intj: return Role.INTJ return None @property def dep(self) -> str: return self.tok.dep_ @property def tag(self) -> str: return self.tok.tag_ @property def lemma(self) -> str: return self.tok.lemma_ @property def ent(self) -> str: return self.tok.ent_type_ @property def ent_tag(self) -> str: tag = self.tok.ent_iob_ if (typ := self.tok.ent_type_): tag += "-"+typ return tag @property def doc(self) -> "Doc": return self.sns(self.tok.doc) @property def sent(self) -> "Span": return self.sns(self.tok.sent) @property def head(self) -> Self: return self.sns(self.tok.head) @property def morph(self) -> MorphAnalysis: return self.tok.morph @property def left_edge(self) -> Self: return self.sns(self.tok.left_edge) @property def right_edge(self) -> Self: return self.sns(self.tok.right_edge) @property def ancestors(self) -> Iterable[Self]: for tok in self.tok.ancestors: yield self.sns(tok) @property def conjuncts(self) -> tuple[Self, ...]: return tuple(self.sns(c) for c in self.tok.conjuncts) @property def children(self) -> Iterable[Self]: for child in self.tok.children: yield self.sns(child) @property def lefts(self) -> Iterable[Self]: for tok in self.tok.lefts: yield self.sns(tok) @property def rights(self) -> Iterable[Self]: for tok in self.tok.rights: yield self.sns(tok) @property def subtree(self) -> Iterable[Self]: for tok in self.tok.subtree: yield self.sns(tok) @property def corefs(self) -> tuple[Self, ...]: # pylint: disable=protected-access,redefined-outer-name if (refs := getattr(self._, f"{self.alias}_corefs", None)): return tuple(self.doc[ref] for ref in refs) return () @property def coref(self) -> Self: """Return main coreferred token or self.""" if (refs := self.corefs): return refs[0] return self # Methods -----------------------------------------------------------------
[docs] def to_str( self, *, color: bool = False, **kwds: Any ) -> str: """Represent as a string. Parameters ---------- color Use colors. **kwds Passed to :func:`~segram.utils.colors.color_role`. They can be used to override the fixed token role with contextual roles using ``role`` keyword argument. """ refs = self.corefs if refs: refs = ",".join(r.to_str(color=False) for r in refs) refs = f"[{refs}]" rrole = kwds.get("role") if rrole is Role.BG: refs = color_role(refs, **{ **kwds, "role": rrole }) else: refs = "" kwds = { "role": self.role, **kwds } return f"{color_role(self.text, color=color, **kwds)}{refs}"
def nbor(self, *args: Any, **kwds: Any) -> Self: return self.sns(self.tok.nbor(*args, **kwds)) def is_ancestor(self, other: SpacyToken | Self) -> bool: if isinstance(other, Token): other = other.tok return self.tok.is_ancestor(other)
# Register comparison functions for testing ----------------------------------- @equal.register def _(obj: Token, other: Token, *, strict: bool = True) -> bool: return equal(obj.doc, other.doc, strict=strict) \ and (obj.i == other.i) @iter_diffs.register def _(obj: Token, other: Token, *, strict: bool = True) -> IDiffType: if not equal(obj, other, strict=strict): yield "TOKEN", obj, other