Source code for chatterbot.tagging

from typing import List, Union, Tuple
from chatterbot import languages
from chatterbot.utils import get_model_for_language
import spacy


[docs] class LowercaseTagger(object): """ Returns the text in lowercase. """ def __init__(self, language=None): from chatterbot.components import chatterbot_lowercase_indexer # noqa self.language = language or languages.ENG # Create a new empty spacy nlp object self.nlp = spacy.blank(self.language.ISO_639_1) self.nlp.add_pipe( 'chatterbot_lowercase_indexer', name='chatterbot_lowercase_indexer', last=True )
[docs] def get_text_index_string(self, text: Union[str, List[str]]): if isinstance(text, list): documents = self.nlp.pipe(text) return [document._.search_index for document in documents] else: document = self.nlp(text) return document._.search_index
[docs] def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]): process_as_tuples = texts and isinstance(texts[0], tuple) documents = self.nlp.pipe(texts, as_tuples=process_as_tuples) return documents
[docs] class PosLemmaTagger(object): def __init__(self, language=None): from chatterbot.components import chatterbot_bigram_indexer # noqa self.language = language or languages.ENG model = get_model_for_language(self.language) # Disable the Named Entity Recognition (NER) component because it is not necessary self.nlp = spacy.load(model, exclude=['ner']) self.nlp.add_pipe( 'chatterbot_bigram_indexer', name='chatterbot_bigram_indexer', last=True )
[docs] def get_text_index_string(self, text: Union[str, List[str]]): """ Return a string of text containing part-of-speech, lemma pairs. """ if isinstance(text, list): documents = self.nlp.pipe(text) return [document._.search_index for document in documents] else: document = self.nlp(text) return document._.search_index
[docs] def as_nlp_pipeline(self, texts: Union[List[str], Tuple[str, dict]]): """ Accepts a single string or a list of strings, or a list of tuples where the first element is the text and the second element is a dictionary of context to return alongside the generated document. """ process_as_tuples = texts and isinstance(texts[0], tuple) documents = self.nlp.pipe(texts, as_tuples=process_as_tuples) return documents