-------------------------------------------------------------
NameError Traceback (most recent call last)
<ipython-input-30-e014e2ea6623> in <module>
-
from nltk.corpus import brown
----> 2 model = gensim.models.Word2Vec(brown.sents ()) NameError: name 'gensim' is not defined
from gensim.models import Word2Vec
from nltk.corpus import brown, movie_reviews, treebank b = Word2Vec(brown.sents())
b.vector_size()
In [31]:
-------------------------------------------------------------
TypeError Traceback (most recent call last)
<ipython-input-31-0536ea377d06> in <module>
from nltk.corpus import brown, movie_reviews, treebank
b = Word2Vec(brown.sents())
----> 4 b.vector_size()
TypeError: 'int' object is not callable
from nltk.corpus import brown
model = gensim.models.Word2Vec(brown.sents())
In [ ]:
model.save('brown.embedding')
new_model = gensim.models.Word2Vec.load('brown.embedding')
In [ ]:
In [18]:
import gensim
-------------------------------------------------------------
ModuleNotFoundError Traceback (most recent call last)
<ipython-input-18-e70e92d32c6e> in <module>
----> 1 import gensim
ModuleNotFoundError: No module named 'gensim'
In [ ]:
In [13]:
[['future', 'king'], ['future', 'prince'], [ 'king', 'prince'], ['king
['prince', 'king'], ['prince', 'future'], [ 'daughter', 'princess'],
['son', 'prince'], ['prince', 'son'], [ 'only', 'man'], ['only', 'can
['man', 'only'], ['man', 'king'], [ 'can', 'king'], ['can', 'man'], [
['king', 'only'], ['only', 'woman'], [ 'only', 'can'], ['only', 'quee
['woman', 'queen'], ['can', 'queen'], [ 'can', 'woman'], ['can', 'onl
['queen', 'only'], ['princess', 'queen'], [ 'queen', 'princess'], ['q
['queen', 'realm'], ['king', 'rule'], [ 'king', 'queen'], ['king', 'r
['rule', 'king'], ['rule', 'queen'], [ 'realm', 'rule'], ['realm', 'k
['prince', 'man'], ['strong', 'man'], [ 'strong', 'prince'], ['man',
['princess', 'beautiful'], ['princess', 'woman'], [ 'beautiful', 'wom
['woman', 'beautiful'], ['woman', 'princess'], [ 'royal', 'family'],
['family', 'king'], ['family', 'royal'], [ 'family', 'queen'], ['fami
['king', 'family'], ['king', 'their'], [ 'king', 'royal'], ['king', '
['queen', 'children'], ['queen', 'family'], [ 'queen', 'royal'], ['th
['their', 'king'], ['their', 'family'], [ 'children', 'their'], ['chi
['prince', 'only'], ['prince', 'boy'], [ 'prince', 'now'], ['only', '
['boy', 'now'], ['boy', 'only'], [ 'boy', 'prince'], ['now', 'boy'], [ 'boy', 'man'], ['man', 'boy']]
Out[13]: 1.0
model.get_weights()
In [17]:
Out[17]: [array([[ 1.2094697 , -1.7519492 , -0.58710396],
[-0.69742936, |
-0.8224115 , |
0.762515 |
], |
||
[-1.0232257 , |
-0.99347407, |
-0.6290375 |
], |
||
[-0.03148846, |
0.861094 |
, |
-1.2489003 |
], |
|
[ 0.5185555 , |
-1.0197939 |
, |
-1.1642642 |
], |
|
[ 0.34528542, |
0.4095648 |
, |
-1.4649962 |
], |
|
[-1.29276 , 0.80572814, -0.03398667], |
|||||
[ 0.40857503, |
0.04271423, |
-0.2268797 |
], |
||
[-0.80741733, |
0.8518701 , |
1.1676799 |
], |
||
[-1.5463994 , |
0.3928059 , |
0.8357471 |
], |
||
[-0.37743744, |
0.01945627, |
0.8162782 |
], |
||
[ 0.19316475, |
-0.22552669, |
1.1549882 |
], |
||
[ 0.99186873, -0.91034657, 0.60148233], |
|||||
[ 0.43138096, |
-0.24971391, |
-0.817542 |
], |
||
[-0.96682817, |
0.11788289, |
-1.4357525 |
], |
||
[-0.37768555, |
1.2125648 , |
-1.2955272 |
], |
||
[-0.24098323, |
0.6833078 , |
-1.21673 |
], |
||
[-0.72854114, |
0.79847366, |
0.57885855], |
|
||
[-0.8282897 , |
-0.26234597, |
1.4050348 ], |
|
||
[ 0.33650818, |
1.3968124 , |
-1.0523984 ], |
|
||
[ 0.08888734, |
-0.6616554 , |
0.04252516]], |
dtype=float32), |
||
array([-0.49878833, 0.2883682 , -0.38746664], dtype=float32), |
array([[ 8.6907446e-01, |
-6.6392583e-01, |
-4.6302193e-01, |
1.5042695e+ |
|
00, |
|
|
|
|
5.5304325e-01, |
5.6181812e-01, |
4.8718947e-01, |
-9.5064050e- |
|
01, |
|
|
|
|
-1.1298110e+00, |
-6.7061740e-01, |
-1.3256146e+00, |
-1.2928932e+ |
|
00, |
|
|
|
|
4.9400973e-01, |
-5.2003294e-01, |
6.8872941e-01, |
1.2350298e+ |
|
00, |
|
|
|
|
-3.3488446e-01, |
8.8095433e-01, |
1.2878020e-01, |
1.2034481e+ |
|
00, |
|
|
|
|
3.4371424e-01], |
||||
|
[-1.0867966e+00, |
9.0126878e-01, |
3.5752165e-01, |
7.8035134e- |
01, |
|
|
|
|
|
-1.1255006e+00, |
1.1815447e+00, |
-3.3572289e-01, |
9.2761588e- |
01, |
|
|
|
|
|
-1.2557857e+00, |
-1.1117984e+00, |
-7.5951183e-01, |
7.3709536e- |
01, |
|
|
|
|
|
-1.5203005e+00, |
6.7338985e-01, |
6.0285056e-01, |
2.7194273e- |
01, |
|
|
|
|
|
2.9193790e-04, |
-6.9741589e-01, |
1.1521496e+00, |
8.2621944e- |
01, |
|
|
|
|
|
-1.0671973e+00], |
|
|
|
|
[ 7.9353249e-01, |
1.0735989e+00, |
4.4674772e-01, |
-1.0618824e+ |
00, |
|
|
|
|
|
9.9174857e-01, |
-1.1751473e+00, |
3.7053224e-01, |
-8.0935717e- |
01, |
|
|
|
|
|
8.8709790e-01, |
1.1756316e+00, |
1.2205394e-01, |
6.3423496e- |
01, |
|
|
|
|
|
-1.0765996e+00, |
-9.5164514e-01, |
-9.4120181e-01, |
-1.3224217e+ |
00, |
|
|
|
|
|
-9.8728615e-01, |
1.2041377e+00, |
1.1767168e+00, |
-1.2862777e+ |
00, |
|
|
|
|
-2.0896141e-01]], dtype=float32),
array([-0.04525664, -0.25900874, 0.5708084 , -0.37810788, -0.472737
55,
-0.68486977, -0.17615764, 0.2708971 , 0.23307966, -0.144220
04, |
|
|||||
|
0.43081766, |
0.04142636, |
-0.13674738, |
0.4116926 |
, |
-0.335976 |
87, |
|
|
|
|
|
|
|
-0.48062968, |
-0.5119752 , |
-0.3982598 , |
-0.3310347 |
, |
-0.422799 |
3 , |
|
|
|
|
|
|
0.74141794], dtype=float32)]
In [14]:
Out[14]:
weights
array([[ 1.2094697 , -1.7519492 , -0.58710396],
[-0.69742936, |
-0.8224115 , |
0.762515 |
], |
|
[-1.0232257 , |
-0.99347407, |
-0.6290375 |
], |
|
[-0.03148846, |
0.861094 , |
-1.2489003 |
], |
|
[ 0.5185555 , |
-1.0197939 , |
-1.1642642 |
], |
|
[ 0.34528542, |
0.4095648 , |
-1.4649962 |
], |
|
[-1.29276 , 0.80572814, -0.03398667], |
||||
[ 0.40857503, |
0.04271423, |
-0.2268797 |
], |
|
[-0.80741733, |
0.8518701 , |
1.1676799 |
], |
|
[-1.5463994 , |
0.3928059 , |
0.8357471 |
], |
|
[-0.37743744, |
0.01945627, |
0.8162782 |
], |
|
[ 0.19316475, |
-0.22552669, |
1.1549882 |
], |
|
[ 0.99186873, -0.91034657, 0.60148233], |
||||
[ 0.43138096, |
-0.24971391, |
-0.817542 |
], |
|
[-0.96682817, |
0.11788289, |
-1.4357525 |
], |
|
[-0.37768555, |
1.2125648 , |
-1.2955272 |
], |
|
[-0.24098323, |
0.6833078 , |
-1.21673 |
], |
|
|
[-0.72854114, |
0.79847366, |
0.57885855], |
|
|
[-0.8282897 , |
-0.26234597, |
1.4050348 ], |
|
|
[ 0.33650818, |
1.3968124 , |
-1.0523984 ], |
|
|
[ 0.08888734, |
-0.6616554 , |
0.04252516]], |
dtype=float32) |
In [ ]: |
|
|
|
|
# Reading the text from the input folder texts = pd.read_csv('input/sample.csv') texts = [x for x in texts['text']] texts
In [4]:
Out[4]: ['The future king is the prince', 'Daughter is the princess ', 'Son is the prince',
'Only a man can be a king ', 'Only a woman can be a queen', 'The princess will be a queen', 'Queen and king rule the realm', 'The prince is a strong man',
'The princess is a beautiful woman ',
'The royal family is the king and queen and their children', 'Prince is only a boy now',
'A boy will be a man']
# Defining the window for context
window = 2
# Creating a placeholder for the scanning of the word list
word_lists = [] all_text = []
for text in texts:
# Cleaning the text
text = text_preprocessing(text) print(text)
# Appending to the all text list
all_text += text
# Creating a context dictionary for i, word in enumerate(text): for w in range (window):
# Getting the context that is ahead by *window* words
if i + 1 + w < len(text):
word_lists.append([word] + [text[(i + 1 + w)]])
# Getting the context that is behind by *window* words
if i - w - 1 >= 0:
word_lists.append([word] + [text[(i - w - 1)]]) unique_word_dict = create_unique_word_dict(all_text)
# Defining the number of features (unique words)
n_words = len(unique_word_dict)
# Getting all the unique words
words = list(unique_word_dict.keys())
# Creating the X and Y matrices using one hot encoding
X = []
Y = []
In [9]:
['future', 'king', 'prince'] ['daughter', 'princess']
['son', 'prince']
['only', 'man', 'can', 'king']
['only', 'woman', 'can', 'queen'] ['princess', 'queen']
['queen', 'king', 'rule', 'realm']
['prince', 'strong', 'man']
['princess', 'beautiful', 'woman']
['royal', 'family', 'king', 'queen', 'their', 'children'] ['prince', 'only', 'boy', 'now']
['boy', 'man']
{'beautiful': 0, 'boy': 1, 'can': 2, 'children': 3, 'daughter': 4, 'f
amily': 5, 'future': 6, 'king': 7, 'man': 8, 'now': 9, 'only': 10, 'p
rince': 11, 'princess': 12, 'queen': 13, 'realm': 14, 'royal': 15, 'r
ule': 16, 'son': 17, 'strong': 18, 'their': 19, 'woman': 20}
In [10]:
Out[10]:
unique_word_dict.keys()
dict_keys(['beautiful', 'boy', 'can', 'children', 'daughter', 'famil y', 'future', 'king', 'man', 'now', 'only', 'prince', 'princess', 'qu
een', 'realm', 'royal', 'rule', 'son', 'strong', 'their', 'woman'])
word_lists
In [7]:
Out[7]: [['future', 'king'],
['future', 'prince'],
['king', 'prince'],
['king', 'future'],
['prince', 'king'],
['prince', 'future'],
['daughter', 'princess'],
['princess', 'daughter'],
['son', 'prince'],
['prince', 'son'],
['only', 'man'],
['only', 'can'],
['only', 'king'],
['man', 'can'],
['man', 'only'],
['man', 'king'],
['can', 'king'],
['can', 'man'],
['can', 'only'],
['king', 'can'],
['king', 'man'],
['king', 'only'],
['only', 'woman'],
['only', 'can'],
['only', 'queen'],
['woman', 'can'],
['woman', 'only'],
['woman', 'queen'],
['can', 'queen'],
['can', 'woman'],
['can', 'only'],
['queen', 'can'],
['queen', 'woman'],
['queen', 'only'],
['princess', 'queen'],
['queen', 'princess'],
['queen', 'king'],
['queen', 'rule'],
['queen', 'realm'],
['king', 'rule'],
['king', 'queen'],
['king', 'realm'],
['rule', 'realm'],
['rule', 'king'],
['rule', 'queen'],
['realm', 'rule'],
['realm', 'king'],
['realm', 'queen'],
['prince', 'strong'],
['prince', 'man'],
['strong', 'man'],
['strong', 'prince'],
['man', 'strong'],
['man', 'prince'],
['princess', 'beautiful'],
['princess', 'woman'],
['beautiful', 'woman'],
['beautiful', 'princess'],
['woman', 'beautiful'],
['woman', 'princess'],
['royal', 'family'],
['royal', 'king'],
['royal', 'queen'],
['royal', 'their'],
['royal', 'children'],
['family', 'king'],
['family', 'royal'],
['family', 'queen'],
['family', 'their'],
['family', 'children'],
['king', 'queen'],
['king', 'family'],
['king', 'their'],
['king', 'royal'],
['king', 'children'],
['queen', 'their'],
['queen', 'king'],
['queen', 'children'],
['queen', 'family'],
['queen', 'royal'],
['their', 'children'],
['their', 'queen'],
['their', 'king'],
['their', 'family'],
['their', 'royal'],
['children', 'their'],
['children', 'queen'],
['children', 'king'],
['children', 'family'],
['children', 'royal'],
['prince', 'only'],
['prince', 'boy'],
['prince', 'now'],
['only', 'boy'],
['only', 'prince'],
['only', 'now'],
['boy', 'now'],
['boy', 'only'],
['boy', 'prince'],
['now', 'boy'],
['now', 'only'],
['now', 'prince'],
['boy', 'man'],
['man', 'boy']]
In [21]:
for i, word_list in tqdm(enumerate(word_lists)):
# Getting the indices
main_word_index = unique_word_dict.get(word_list[0]) print( "main_word_index",main_word_index) context_word_index = unique_word_dict.get(word_list[1]) print("context_word_index",context_word_index)
# Creating the placeholders X_row = np.zeros(n_words) Y_row = np.zeros(n_words)
# One hot encoding the main word
X_row[main_word_index] = 1
# One hot encoding the Y matrix words
Y_row[context_word_index] = 1
# Appending to the main matrices
X.append(X_row) Y.append(Y_row)
# Converting the matrices into a sparse format because the vast majori
X = sparse.csr_matrix(X) Y = sparse.csr_matrix(Y) X
0it [00:00, ?it/s]
main_word_index 6
context_word_index 7
-------------------------------------------------------------
AttributeError Traceback (most recent call last)
<ipython-input-21-177ceb271a2a> in <module> 17
18 # Appending to the main matrices
---> 19 X.append(X_row)
20 Y.append(Y_row)
21
c:\users\win10\anaconda3\envs\nlp_projects\lib\site-packages\scipy\sp
arse\base.py in getattr (self, attr)
685 return self.getnnz()
686 else:
--> 687 raise AttributeError(attr + " not found")
688
689 def transpose(self, axes=None, copy= False): AttributeError: append not found
n_words
In [17]:
Out[17]: 21
X_row = np.zeros(n_words) X_row
In [18]:
Out[18]: |
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., |
0., |
0., |
0., |
0., |
0., |
0., |
0., |
|
0., 0., 0., 0.]) |
|
|
|
|
|
|
|
In [19]: |
X.toarray() |
|
|||||
Out[19]: |
array([[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
..., |
|
|
|
|
|
|
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
1., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.]]) |
In [20]: |
Y.toarray() |
|
|
|
|
|
|
Out[20]: |
array([[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
..., |
|
|
|
|
|
|
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
0., |
0., |
..., |
0., |
0., |
0.], |
|
[0., |
1., |
0., |
..., |
0., |
0., |
0.]]) |
weights
In [9]:
Out[9]: array([[ 0.48290667, 0.8457001 , 0.59346896, 1.2645154 , -1.204026
9 ,
8 ,
9 ,
4 ,
8 ,
86,
,
07,
84,
44,
32,
5 ,
56,
7 ,
8 ,
8 ,
4 ,
23,
65,
1.1573222 ],
[ 0.73837304, -0.20747106, |
0.4157775 , |
-1.1825156 |
, |
1.383047 |
||
-0.05894116], [-0.5129488 , -0.6455666 , |
0.22340234, |
-0.2326535 |
, |
0.684774 |
||
0.5590865 |
], |
|
|
|
|
|
[-0.6766587 |
, |
-0.7023404 , |
-1.1072837 , |
0.59157366, |
-0.806497 |
|
-0.5534737 |
], |
|
|
|
|
|
[-0.8892283 |
, |
0.96314925, |
-0.19587861, |
1.0056496 , |
-1.003864 |
|
1.3950247 |
], |
|
|
|
|
|
[-0.5153948 |
, |
-1.0223849 , |
-1.188952 , |
0.27847248, |
-0.771121 |
|
-0.8921992 |
], |
|
|
|
|
|
[ 1.2034631 |
, |
-1.1523337 , |
-1.4931266 , |
0.46258503, |
0.99452 |
|
-1.2687634 |
], |
|
|
|
|
|
[-1.1228006 |
, |
0.60329014, |
-0.13347368, |
-1.2324024 , |
-0.129033 |
|
-0.8778439 |
], |
|
|
|
|
|
[ 1.0579053 , -1.3727962 |
, |
0.79554003, |
0.38659805, |
0.412905 |
||
-0.20482758], [ 1.0717387 , -0.9128558 |
, |
0.52586603, |
-0.40760133, |
0.899142 |
||
0.43862754], [ 0.58207464, -0.9047044 |
, |
0.25423622, |
-0.38664833, |
-0.130590 |
0.1812161 ],
[ 0.476674 , 0.12483631, 0.95619524, -0.06299967, 1.047062
-1.2895753 ],
[ 0.97549105, 1.1289161 , -0.54957724, -0.94887674, -0.874517
1.1080424 ],
[-1.0478702 , 0.29631495, 0.6564594 , 0.7506916 , -0.289654
-0.3990863 ],
[-0.7447832 , 0.7770811 , -0.7349353 , 0.92941946, 1.260282
0.8298819 ],
[-1.2512238 , -0.39256236, -0.9883344 , 0.23578405, -0.303579
-0.8775306 ],
[-0.36981118, 0.97570264, -1.281236 , 0.8374807 , 1.132740
-1.0228343 , |
-0.83191454, |
-0.9423956 |
, |
0.943447 |
, -1.1111679 , -0.12512505, -1.1358008 , 0.846277 |
-0.23229918],
[ 0.97142696,
-0.8568555 ]
[ 0.47624534,
-1.3412069 ],
6 ,
06,
[-0.5601015 , -0.36370665, -0.7968366 , 0.6025755 , -0.544403
-0.82952034],
[-0.6789324 , 0.42493168, 0.19000857, -1.203751 , -0.868954
1.2139951 ]], dtype=float32)
import tensorflow as tf tf. version
In [1]:
Out[1]: '2.0.0'
!pip install tensorflow==2.0.0
In [ ]: