I am currently working on a project where I'm attempting to use Word2Vec in combination with Multinomial Naive Bayes (MultinomialNB) for accuracy calculations.
import pandas as pd
import numpy as np, sys
from sklearn.model_selection import train_test_split
from gensim.models import Word2Vec
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, precision_score
from datasets import load_dataset
df = load_dataset('celsowm/bbc_news_ptbr', split='train')
X = df['texto']
y = df['categoria']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
sentences = [sentence.split() for sentence in X_train]
w2v_model = Word2Vec(sentences, vector_size=100, window=5, min_count=5, workers=4)
def vectorize(sentence):
words = sentence.split()
words_vecs = [w2v_model.wv[word] for word in words if word in w2v_model.wv]
if len(words_vecs) == 0:
return np.zeros(100)
words_vecs = np.array(words_vecs)
return words_vecs.mean(axis=0)
X_train = np.array([vectorize(sentence) for sentence in X_train])
X_test = np.array([vectorize(sentence) for sentence in X_test])
clf = MultinomialNB()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred, pos_label='positive'))
However, I've encountered an error:
ValueError("Negative values in data passed to %s" % whom)
ValueError: Negative values in data passed to MultinomialNB (input X)
I would appreciate any insights into resolving this issue.
from Negative values in data passed to MultinomialNB when vectorize using Word2Vec
No comments:
Post a Comment