Datos: Enlace a drive para los datos Lo que vamos a necesitar para correr el código import numpy as np from optparse import OptionParser import sys import re from time import time import matplotlib.pyplot as plt from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.naive_bayes import BernoulliNB, ComplementNB, MultinomialNB from sklearn.linear_model import LogisticRegression as LR from nltk.tokenize import TweetTokenizer from sklearn import metrics import random np.random.seed(42069) random.seed(69420) Funciones útiles que no me voy a detener a explicar: def sort_coo(coo_matrix): tuples = zip(coo_matrix.col, coo_matrix.data) return sorted(tuples, key=lambda x: (x[1], x[0]), reverse=True) def extract_topn_from_vector(feature_names, sorted_items, topn): sorted_items = sorted_items[:topn] score_vals = [] feature_vals = [] for idx, score in sorted_items: score_vals.append(round(score, 3)) feature_vals.append(feature_names[idx]) results= {} for idx in range(len(feature_vals)): results[feature_vals[idx]]=score_vals[idx] return results Enlace a mis repositorios: Enlace a la libreta usada en el taller de Hackapalooza