I'm newbie to spacy and I've read the docs about token-base matching. I've tried spaCy matcher using the REGEX but I don't have any results.
When I use the re library to do the match it works though.
Am I doing something wrong in the code.
I'm trying to match the "accès'd" word
Thanks for your help
# REGEX
import re
text = u"accès'd est ferme aujpourd'hui"
pattern_re = re.compile("^acc?é?e?è?s?s?'?D" , re.I)
pattern_re.match(text)
# <re.Match object; span=(0, 7), match="accès'd">
# REGEX SPACY VERSION 1
import spacy
from spacy.matcher import Matcher
nlp = spacy.load("fr_core_news_sm")
pattern = [{'TEXT': {'REGEX' : "^acc?é?e?è?s?s?'?D"}}]
matcher = Matcher(nlp.vocab)
matcher.add('AccèsD' , None , pattern)
doc = nlp(text)
matches = matcher(doc)
for match_id, start , end in matches:
match_string = nlp.vocab.strings[match_id]
span = doc[start:end]
print(match_id, match_string, start , end , span.text)
# NOTHING
# REGEX SPACY VERSION 2
import spacy
from spacy.matcher import Matcher
nlp = spacy.load("fr_core_news_sm")
accesd_flag = lambda text : bool(re.compile(r"^acc?é?e?è?s?s?'?D" , re.I).match(text))
IS_ACCESD = nlp.vocab.add_flag(accesd_flag)
pattern= [{IS_ACCESD : True}]
matcher = Matcher(nlp.vocab)
matcher.add('AccèsD' , None , pattern)
doc = nlp(text)
matches = matcher(doc)
for match_id, start , end in matches:
match_string = nlp.vocab.strings[match_id]
span = doc[start:end]
print(match_id, match_string, start , end , span.text)
# NOTHING
on_match
function instead ofNone
then you can use it to help you debug