Python : Day 9 – Lesson 9



-------------------------------------------------------------

NameError Traceback (most recent call last)

<ipython-input-30-e014e2ea6623> in <module>

  1. from nltk.corpus import brown

    ----> 2 model = gensim.models.Word2Vec(brown.sents ()) NameError: name 'gensim' is not defined


    from gensim.models import Word2Vec

    from nltk.corpus import brown, movie_reviews, treebank b = Word2Vec(brown.sents())

    b.vector_size()

    In [31]:



-------------------------------------------------------------

TypeError Traceback (most recent call last)

<ipython-input-31-0536ea377d06> in <module>

  • from nltk.corpus import brown, movie_reviews, treebank

  • b = Word2Vec(brown.sents())

  • ----> 4 b.vector_size()


    TypeError: 'int' object is not callable


    from nltk.corpus import brown

    model = gensim.models.Word2Vec(brown.sents())

    In [ ]:


    model.save('brown.embedding')

    new_model = gensim.models.Word2Vec.load('brown.embedding')

    In [ ]:


    In [18]:

    import gensim

    -------------------------------------------------------------

    ModuleNotFoundError Traceback (most recent call last)

    <ipython-input-18-e70e92d32c6e> in <module>

    ----> 1 import gensim


    ModuleNotFoundError: No module named 'gensim'


    In [ ]:


    In [13]:

    [['future', 'king'], ['future', 'prince'], [ 'king', 'prince'], ['king

    ['prince', 'king'], ['prince', 'future'], [ 'daughter', 'princess'],

    ['son', 'prince'], ['prince', 'son'], [ 'only', 'man'], ['only', 'can

    ['man', 'only'], ['man', 'king'], [ 'can', 'king'], ['can', 'man'], [

    ['king', 'only'], ['only', 'woman'], [ 'only', 'can'], ['only', 'quee

    ['woman', 'queen'], ['can', 'queen'], [ 'can', 'woman'], ['can', 'onl

    ['queen', 'only'], ['princess', 'queen'], [ 'queen', 'princess'], ['q

    ['queen', 'realm'], ['king', 'rule'], [ 'king', 'queen'], ['king', 'r

    ['rule', 'king'], ['rule', 'queen'], [ 'realm', 'rule'], ['realm', 'k

    ['prince', 'man'], ['strong', 'man'], [ 'strong', 'prince'], ['man',

    ['princess', 'beautiful'], ['princess', 'woman'], [ 'beautiful', 'wom

    ['woman', 'beautiful'], ['woman', 'princess'], [ 'royal', 'family'],

    ['family', 'king'], ['family', 'royal'], [ 'family', 'queen'], ['fami

    ['king', 'family'], ['king', 'their'], [ 'king', 'royal'], ['king', '

    ['queen', 'children'], ['queen', 'family'], [ 'queen', 'royal'], ['th

    ['their', 'king'], ['their', 'family'], [ 'children', 'their'], ['chi

    ['prince', 'only'], ['prince', 'boy'], [ 'prince', 'now'], ['only', '

    ['boy', 'now'], ['boy', 'only'], [ 'boy', 'prince'], ['now', 'boy'], [ 'boy', 'man'], ['man', 'boy']]


    Out[13]: 1.0


    model.get_weights()

    In [17]:

    Out[17]: [array([[ 1.2094697 , -1.7519492 , -0.58710396],

    [-0.69742936,

    -0.8224115 ,

    0.762515

    ],

    [-1.0232257 ,

    -0.99347407,

    -0.6290375

    ],

    [-0.03148846,

    0.861094

    ,

    -1.2489003

    ],

    [ 0.5185555 ,

    -1.0197939

    ,

    -1.1642642

    ],

    [ 0.34528542,

    0.4095648

    ,

    -1.4649962

    ],

    [-1.29276 , 0.80572814, -0.03398667],

    [ 0.40857503,

    0.04271423,

    -0.2268797

    ],

    [-0.80741733,

    0.8518701 ,

    1.1676799

    ],

    [-1.5463994 ,

    0.3928059 ,

    0.8357471

    ],

    [-0.37743744,

    0.01945627,

    0.8162782

    ],

    [ 0.19316475,

    -0.22552669,

    1.1549882

    ],

    [ 0.99186873, -0.91034657, 0.60148233],

    [ 0.43138096,

    -0.24971391,

    -0.817542

    ],

    [-0.96682817,

    0.11788289,

    -1.4357525

    ],

    [-0.37768555,

    1.2125648 ,

    -1.2955272

    ],

    [-0.24098323,

    0.6833078 ,

    -1.21673

    ],

    [-0.72854114,

    0.79847366,

    0.57885855],


    [-0.8282897 ,

    -0.26234597,

    1.4050348 ],


    [ 0.33650818,

    1.3968124 ,

    -1.0523984 ],


    [ 0.08888734,

    -0.6616554 ,

    0.04252516]],

    dtype=float32),

    array([-0.49878833, 0.2883682 , -0.38746664], dtype=float32),


    array([[ 8.6907446e-01,

    -6.6392583e-01,

    -4.6302193e-01,

    1.5042695e+

    00,




    5.5304325e-01,

    5.6181812e-01,

    4.8718947e-01,

    -9.5064050e-

    01,




    -1.1298110e+00,

    -6.7061740e-01,

    -1.3256146e+00,

    -1.2928932e+

    00,




    4.9400973e-01,

    -5.2003294e-01,

    6.8872941e-01,

    1.2350298e+

    00,




    -3.3488446e-01,

    8.8095433e-01,

    1.2878020e-01,

    1.2034481e+

    00,




    3.4371424e-01],


    [-1.0867966e+00,

    9.0126878e-01,

    3.5752165e-01,

    7.8035134e-

    01,






    -1.1255006e+00,

    1.1815447e+00,

    -3.3572289e-01,

    9.2761588e-

    01,






    -1.2557857e+00,

    -1.1117984e+00,

    -7.5951183e-01,

    7.3709536e-

    01,






    -1.5203005e+00,

    6.7338985e-01,

    6.0285056e-01,

    2.7194273e-

    01,






    2.9193790e-04,

    -6.9741589e-01,

    1.1521496e+00,

    8.2621944e-

    01,






    -1.0671973e+00],





    [ 7.9353249e-01,

    1.0735989e+00,

    4.4674772e-01,

    -1.0618824e+

    00,






    9.9174857e-01,

    -1.1751473e+00,

    3.7053224e-01,

    -8.0935717e-

    01,






    8.8709790e-01,

    1.1756316e+00,

    1.2205394e-01,

    6.3423496e-

    01,






    -1.0765996e+00,

    -9.5164514e-01,

    -9.4120181e-01,

    -1.3224217e+

    00,






    -9.8728615e-01,

    1.2041377e+00,

    1.1767168e+00,

    -1.2862777e+

    00,





    -2.0896141e-01]], dtype=float32),

    array([-0.04525664, -0.25900874, 0.5708084 , -0.37810788, -0.472737

    55,

    -0.68486977, -0.17615764, 0.2708971 , 0.23307966, -0.144220

    04,



    0.43081766,

    0.04142636,

    -0.13674738,

    0.4116926

    ,

    -0.335976

    87,








    -0.48062968,

    -0.5119752 ,

    -0.3982598 ,

    -0.3310347

    ,

    -0.422799

    3 ,







    0.74141794], dtype=float32)]


    In [14]:

    Out[14]:

    weights

    array([[ 1.2094697 , -1.7519492 , -0.58710396],

    [-0.69742936,

    -0.8224115 ,

    0.762515

    ],

    [-1.0232257 ,

    -0.99347407,

    -0.6290375

    ],

    [-0.03148846,

    0.861094 ,

    -1.2489003

    ],

    [ 0.5185555 ,

    -1.0197939 ,

    -1.1642642

    ],

    [ 0.34528542,

    0.4095648 ,

    -1.4649962

    ],

    [-1.29276 , 0.80572814, -0.03398667],

    [ 0.40857503,

    0.04271423,

    -0.2268797

    ],

    [-0.80741733,

    0.8518701 ,

    1.1676799

    ],

    [-1.5463994 ,

    0.3928059 ,

    0.8357471

    ],

    [-0.37743744,

    0.01945627,

    0.8162782

    ],

    [ 0.19316475,

    -0.22552669,

    1.1549882

    ],

    [ 0.99186873, -0.91034657, 0.60148233],

    [ 0.43138096,

    -0.24971391,

    -0.817542

    ],

    [-0.96682817,

    0.11788289,

    -1.4357525

    ],

    [-0.37768555,

    1.2125648 ,

    -1.2955272

    ],

    [-0.24098323,

    0.6833078 ,

    -1.21673

    ],


    [-0.72854114,

    0.79847366,

    0.57885855],



    [-0.8282897 ,

    -0.26234597,

    1.4050348 ],



    [ 0.33650818,

    1.3968124 ,

    -1.0523984 ],



    [ 0.08888734,

    -0.6616554 ,

    0.04252516]],

    dtype=float32)

    In [ ]:






    # Reading the text from the input folder texts = pd.read_csv('input/sample.csv') texts = [x for x in texts['text']] texts

    In [4]:



    Out[4]: ['The future king is the prince', 'Daughter is the princess ', 'Son is the prince',

    'Only a man can be a king ', 'Only a woman can be a queen', 'The princess will be a queen', 'Queen and king rule the realm', 'The prince is a strong man',

    'The princess is a beautiful woman ',

    'The royal family is the king and queen and their children', 'Prince is only a boy now',

    'A boy will be a man']


    # Defining the window for context

    window = 2


    # Creating a placeholder for the scanning of the word list

    word_lists = [] all_text = []


    for text in texts:


    # Cleaning the text

    text = text_preprocessing(text) print(text)


    # Appending to the all text list

    all_text += text


    # Creating a context dictionary for i, word in enumerate(text): for w in range (window):

    # Getting the context that is ahead by *window* words

    if i + 1 + w < len(text):

    word_lists.append([word] + [text[(i + 1 + w)]])

    # Getting the context that is behind by *window* words

    if i - w - 1 >= 0:

    word_lists.append([word] + [text[(i - w - 1)]]) unique_word_dict = create_unique_word_dict(all_text)

    # Defining the number of features (unique words)

    n_words = len(unique_word_dict)


    # Getting all the unique words

    words = list(unique_word_dict.keys())


    # Creating the X and Y matrices using one hot encoding

    X = []

    Y = []

    In [9]:


    ['future', 'king', 'prince'] ['daughter', 'princess']

    ['son', 'prince']

    ['only', 'man', 'can', 'king']

    ['only', 'woman', 'can', 'queen'] ['princess', 'queen']

    ['queen', 'king', 'rule', 'realm']

    ['prince', 'strong', 'man']

    ['princess', 'beautiful', 'woman']

    ['royal', 'family', 'king', 'queen', 'their', 'children'] ['prince', 'only', 'boy', 'now']

    ['boy', 'man']

    {'beautiful': 0, 'boy': 1, 'can': 2, 'children': 3, 'daughter': 4, 'f

    amily': 5, 'future': 6, 'king': 7, 'man': 8, 'now': 9, 'only': 10, 'p

    rince': 11, 'princess': 12, 'queen': 13, 'realm': 14, 'royal': 15, 'r

    ule': 16, 'son': 17, 'strong': 18, 'their': 19, 'woman': 20}


    In [10]:

    Out[10]:

    unique_word_dict.keys()

    dict_keys(['beautiful', 'boy', 'can', 'children', 'daughter', 'famil y', 'future', 'king', 'man', 'now', 'only', 'prince', 'princess', 'qu

    een', 'realm', 'royal', 'rule', 'son', 'strong', 'their', 'woman'])


    word_lists

    In [7]:

    Out[7]: [['future', 'king'],

    ['future', 'prince'],

    ['king', 'prince'],

    ['king', 'future'],

    ['prince', 'king'],

    ['prince', 'future'],

    ['daughter', 'princess'],

    ['princess', 'daughter'],

    ['son', 'prince'],

    ['prince', 'son'],

    ['only', 'man'],

    ['only', 'can'],

    ['only', 'king'],

    ['man', 'can'],

    ['man', 'only'],

    ['man', 'king'],

    ['can', 'king'],

    ['can', 'man'],

    ['can', 'only'],

    ['king', 'can'],

    ['king', 'man'],

    ['king', 'only'],

    ['only', 'woman'],

    ['only', 'can'],

    ['only', 'queen'],

    ['woman', 'can'],

    ['woman', 'only'],

    ['woman', 'queen'],

    ['can', 'queen'],

    ['can', 'woman'],

    ['can', 'only'],

    ['queen', 'can'],

    ['queen', 'woman'],

    ['queen', 'only'],

    ['princess', 'queen'],

    ['queen', 'princess'],

    ['queen', 'king'],

    ['queen', 'rule'],

    ['queen', 'realm'],

    ['king', 'rule'],

    ['king', 'queen'],

    ['king', 'realm'],

    ['rule', 'realm'],

    ['rule', 'king'],

    ['rule', 'queen'],

    ['realm', 'rule'],

    ['realm', 'king'],

    ['realm', 'queen'],

    ['prince', 'strong'],

    ['prince', 'man'],

    ['strong', 'man'],

    ['strong', 'prince'],

    ['man', 'strong'],

    ['man', 'prince'],

    ['princess', 'beautiful'],

    ['princess', 'woman'],

    ['beautiful', 'woman'],

    ['beautiful', 'princess'],

    ['woman', 'beautiful'],

    ['woman', 'princess'],

    ['royal', 'family'],

    ['royal', 'king'],

    ['royal', 'queen'],

    ['royal', 'their'],

    ['royal', 'children'],

    ['family', 'king'],

    ['family', 'royal'],

    ['family', 'queen'],

    ['family', 'their'],

    ['family', 'children'],

    ['king', 'queen'],

    ['king', 'family'],

    ['king', 'their'],

    ['king', 'royal'],

    ['king', 'children'],

    ['queen', 'their'],

    ['queen', 'king'],

    ['queen', 'children'],

    ['queen', 'family'],

    ['queen', 'royal'],

    ['their', 'children'],

    ['their', 'queen'],

    ['their', 'king'],

    ['their', 'family'],

    ['their', 'royal'],

    ['children', 'their'],

    ['children', 'queen'],

    ['children', 'king'],

    ['children', 'family'],

    ['children', 'royal'],

    ['prince', 'only'],

    ['prince', 'boy'],

    ['prince', 'now'],

    ['only', 'boy'],

    ['only', 'prince'],

    ['only', 'now'],

    ['boy', 'now'],

    ['boy', 'only'],

    ['boy', 'prince'],

    ['now', 'boy'],

    ['now', 'only'],

    ['now', 'prince'],

    ['boy', 'man'],

    ['man', 'boy']]


    In [21]:

    for i, word_list in tqdm(enumerate(word_lists)):

    # Getting the indices

    main_word_index = unique_word_dict.get(word_list[0]) print( "main_word_index",main_word_index) context_word_index = unique_word_dict.get(word_list[1]) print("context_word_index",context_word_index)


    # Creating the placeholders X_row = np.zeros(n_words) Y_row = np.zeros(n_words)


    # One hot encoding the main word

    X_row[main_word_index] = 1


    # One hot encoding the Y matrix words

    Y_row[context_word_index] = 1


    # Appending to the main matrices

    X.append(X_row) Y.append(Y_row)


    # Converting the matrices into a sparse format because the vast majori

    X = sparse.csr_matrix(X) Y = sparse.csr_matrix(Y) X

    0it [00:00, ?it/s]

    main_word_index 6

    context_word_index 7


    -------------------------------------------------------------

    AttributeError Traceback (most recent call last)

    <ipython-input-21-177ceb271a2a> in <module> 17

    18 # Appending to the main matrices

    ---> 19 X.append(X_row)

    20 Y.append(Y_row)

    21


    c:\users\win10\anaconda3\envs\nlp_projects\lib\site-packages\scipy\sp

    arse\base.py in   getattr (self, attr)

    685 return self.getnnz()

    686 else:

    --> 687 raise AttributeError(attr + " not found")

    688

    689 def transpose(self, axes=None, copy= False): AttributeError: append not found


    n_words

    In [17]:

    Out[17]: 21


    X_row = np.zeros(n_words) X_row

    In [18]:


    Out[18]:

    array([0., 0., 0., 0., 0., 0., 0., 0.,

    0., 0.,

    0.,

    0.,

    0.,

    0.,

    0.,

    0.,

    0.,


    0., 0., 0., 0.])









    In [19]:

    X.toarray()


    Out[19]:

    array([[0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    ...,








    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    1.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.]])

    In [20]:

    Y.toarray()







    Out[20]:

    array([[0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    ...,








    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    0.,

    0.,

    ...,

    0.,

    0.,

    0.],


    [0.,

    1.,

    0.,

    ...,

    0.,

    0.,

    0.]])


    weights

    In [9]:

    Out[9]: array([[ 0.48290667, 0.8457001 , 0.59346896, 1.2645154 , -1.204026

    9 ,


    8 ,


    9 ,


    4 ,


    8 ,


    86,


    ,


    07,


    84,


    44,


    32,


    5 ,


    56,


    7 ,


    8 ,


    8 ,


    4 ,


    23,


    65,

    1.1573222 ],

    [ 0.73837304, -0.20747106,

    0.4157775 ,

    -1.1825156

    ,

    1.383047

    -0.05894116],

    [-0.5129488 , -0.6455666 ,


    0.22340234,


    -0.2326535


    ,


    0.684774


    0.5590865


    ],





    [-0.6766587

    ,

    -0.7023404 ,

    -1.1072837 ,

    0.59157366,

    -0.806497

    -0.5534737

    ],





    [-0.8892283

    ,

    0.96314925,

    -0.19587861,

    1.0056496 ,

    -1.003864

    1.3950247

    ],





    [-0.5153948

    ,

    -1.0223849 ,

    -1.188952 ,

    0.27847248,

    -0.771121

    -0.8921992

    ],





    [ 1.2034631

    ,

    -1.1523337 ,

    -1.4931266 ,

    0.46258503,

    0.99452

    -1.2687634

    ],





    [-1.1228006

    ,

    0.60329014,

    -0.13347368,

    -1.2324024 ,

    -0.129033

    -0.8778439

    ],





    [ 1.0579053 , -1.3727962

    ,

    0.79554003,

    0.38659805,

    0.412905

    -0.20482758],

    [ 1.0717387 , -0.9128558


    ,


    0.52586603,


    -0.40760133,


    0.899142

    0.43862754],

    [ 0.58207464, -0.9047044


    ,


    0.25423622,


    -0.38664833,


    -0.130590


    0.1812161 ],

    [ 0.476674 , 0.12483631, 0.95619524, -0.06299967, 1.047062


    -1.2895753 ],

    [ 0.97549105, 1.1289161 , -0.54957724, -0.94887674, -0.874517


    1.1080424 ],

    [-1.0478702 , 0.29631495, 0.6564594 , 0.7506916 , -0.289654


    -0.3990863 ],

    [-0.7447832 , 0.7770811 , -0.7349353 , 0.92941946, 1.260282


    0.8298819 ],

    [-1.2512238 , -0.39256236, -0.9883344 , 0.23578405, -0.303579


    -0.8775306 ],

    [-0.36981118, 0.97570264, -1.281236 , 0.8374807 , 1.132740


    -1.0228343 ,

    -0.83191454,

    -0.9423956

    ,

    0.943447

    ,

    -1.1111679 , -0.12512505, -1.1358008 , 0.846277

    -0.23229918],

    [ 0.97142696,


    -0.8568555 ]

    [ 0.47624534,


    -1.3412069 ],


    6 ,


    06,

    [-0.5601015 , -0.36370665, -0.7968366 , 0.6025755 , -0.544403


    -0.82952034],

    [-0.6789324 , 0.42493168, 0.19000857, -1.203751 , -0.868954


    1.2139951 ]], dtype=float32)


    import tensorflow as tf tf. version

    In [1]:


    Out[1]: '2.0.0'


    !pip install tensorflow==2.0.0

    In [ ]: