我打算使用spaCy NLP引擎,我已經從字典開始。我已閱讀this resource和this,但無法開始執行此操作。如何爲spacy NLP創建詞典?
我有這樣的代碼:
from spacy.en import English
import _regex
parser = English()
# Test Data
multiSentence = "There is an art, it says, or rather, a knack to flying." \
"The knack lies in learning how to throw yourself at the ground and miss." \
"In the beginning the Universe was created. This has made a lot of people "\
"very angry and been widely regarded as a bad move."
parsedData = parser(multiSentence)
for i, token in enumerate(parsedData):
print("original:", token.orth, token.orth_)
print("lowercased:", token.lower, token.lower_)
print("lemma:", token.lemma, token.lemma_)
print("shape:", token.shape, token.shape_)
print("prefix:", token.prefix, token.prefix_)
print("suffix:", token.suffix, token.suffix_)
print("log probability:", token.prob)
print("Brown cluster id:", token.cluster)
print("----------------------------------------")
if i > 1:
break
# Let's look at the sentences
sents = []
for span in parsedData.sents:
# go from the start to the end of each span, returning each token in the sentence
# combine each token using join()
sent = ''.join(parsedData[i].string for i in range(span.start, span.end)).strip()
sents.append(sent)
print('To show sentence')
for sentence in sents:
print(sentence)
# Let's look at the part of speech tags of the first sentence
for span in parsedData.sents:
sent = [parsedData[i] for i in range(span.start, span.end)]
break
for token in sent:
print(token.orth_, token.pos_)
# Let's look at the dependencies of this example:
example = "The boy with the spotted dog quickly ran after the firetruck."
parsedEx = parser(example)
# shown as: original token, dependency tag, head word, left dependents, right dependents
for token in parsedEx:
print(token.orth_, token.dep_, token.head.orth_, [t.orth_ for t in token.lefts], [t.orth_ for t in token.rights])
# Let's look at the named entities of this example:
example = "Apple's stocks dropped dramatically after the death of Steve Jobs in October."
parsedEx = parser(example)
for token in parsedEx:
print(token.orth_, token.ent_type_ if token.ent_type_ != "" else "(not an entity)")
print("-------------- entities only ---------------")
# if you just want the entities and nothing else, you can do access the parsed examples "ents" property like this:
ents = list(parsedEx.ents)
for entity in ents:
print(entity.label, entity.label_, ' '.join(t.orth_ for t in entity))
messyData = "lol that is rly funny :) This is gr8 i rate it 8/8!!!"
parsedData = parser(messyData)
for token in parsedData:
print(token.orth_, token.pos_, token.lemma_)
我在哪裏可以更改這些令牌(token.orth,token.orth_,等等):
print("original:", token.orth, token.orth_)
print("lowercased:", token.lower, token.lower_)
print("lemma:", token.lemma, token.lemma_)
print("shape:", token.shape, token.shape_)
print("prefix:", token.prefix, token.prefix_)
print("suffix:", token.suffix, token.suffix_)
print("log probability:", token.prob)
print("Brown cluster id:", token.cluster)
我可以保存這些標記在自己的字典?感謝您的幫助
你能否進一步解釋一下關於你所期望的字典中獲得什麼? – alvas