Tuesday, 15 September 2020

Errno 104 - Connection reset by peer on docker

I have a web project which I have hosted on the server. Frontend is angular, backend is flask and database is mongodb and all are made as a docker container linking with each other.

The problem arises when backend tried to fetch data from mongodb. It comes as --> "ip address of my server":27017: [Errno 104] Connection reset by peer

code is -

#Vectorization of text using TF/IDF and calculating similarity with asked question vector by increase 
weights for technical terms
import gensim
import numpy as np
import re
import json


def get_keywords(ques, bug_id): #extract the keywords from the question like version numbers (ex, 16.6.1) and platforms (ex, CAT9k) and return list of keywords(useful for analytics)

num_words = []
for i in ques.split():
    if re.search(r'\d', i) != None:
        num_words.append(i.strip('? '))
for i in num_words:
    if '/' in i:
        lis = i.split('/')
        num_words.remove(i)
        for j in lis:
            num_words.append(j.strip(' ?'))
num_words = [ i.lower() for i in  num_words]
if bug_id.lower() in num_words:
    num_words.remove(bug_id.lower())
return num_words


def get_numeric_similarity(ques, s, bug_id): #perform keyword similarity check (release versions and platforms,etc) b/w the question asked by CE and all the questions for that particular BUG in database

num_words,del_list = [],[]
s = s.lower()
cnt = 0
for i in ques.split():
    if re.search(r'\d', i) != None:
        num_words.append(i.strip('? '))
for i in num_words:
    if '/' in i:
        lis = i.split('/')
        num_words.remove(i)
        for j in lis:
            num_words.append(j.strip(' ?'))
num_words = [ i.lower() for i in  num_words]
if bug_id.lower() in num_words:
    num_words.remove(bug_id.lower()) #num_words has all the keywords ,i.e, the words containing digits ex, release version (16.6.1) and the platform number (ex: cat9k), except for the BUG ID
size = len(num_words)

for i in num_words: #perform an exact similarity match: if the contents of num_words are present in s, increment cnt by 1 (ex, 16.6.1 in question asked matches with 16.6.1 in question in database)
    if i in s:
        cnt += 1
        del_list.append(i)
for i in del_list:
    num_words.remove(i)
    
del_list = []
for i in s.split(): #if the version number mentioned in s has 'x', for ex, 16.x release, then only check for the first set of digit/s before '.' ,i.e., '16' and if it matches, increment cnt by 1 (16.6.1 matches with 16.x)
    if re.search(r'[0-9]{1,2}\.x\.x|[0-9]{1,2}\.x',i):
        for ele in num_words:
            if i.split('.')[0] in ele.split('.')[0]:
                cnt += 1
                del_list.append(ele)
for i in del_list:
    num_words.remove(i)

find = re.findall(r'[0-9]{1,2}\.[0-9]{1,2}\.{0,1}[0-9]{0,2}',s)

del_list = []
for i in num_words: #if the question being asked has 'x' in the release version,i.e., 16.x release, then check with all the release versions in s and if the first set of digits match, increment cnt by 0.5 (16.x partially matches with 16.6.1)
    if re.search(r'[0-9]{1,2}\.x\.x|[0-9]{1,2}\.x',i):
        for ele in find:
            if i.split('.')[0] == ele.split('.')[0]:
                cnt += 0.5
                del_list.append(i)
                break
for i in del_list:
    num_words.remove(i)

del_list = []
if len(num_words) > 0: #do a similarity check in case of 'k' in platform number (cat9000 matches with cat9k)
    find1 = re.findall(r'[a-z]{1,3}\s{0,1}[0-9]000',s)
    find2 = re.findall(r'[a-z]{1,3}\s{0,1}[0-9]k',s)
    find = find1 + find2
    find = ["".join(i.split()) for i in find]
    find = [re.sub(r'000','k',i) for i in find]
    num_words = [re.sub(r'000','k',i) for i in num_words]
    for i in num_words:
        if i in find:
            cnt += 1
            del_list.append(i)
    for i in del_list:
        num_words.remove(i)

#check for only digit similarity for ex, Catalyst 9000 matches with Cat9000
del_list = []
if len(num_words) > 0:
     for i in num_words:
        find = re.search(r'\d(.*)\d|\dk',i)
        if find:
            if find.group(0) in s:
                cnt += 0.5
                del_list.append(i)
for i in del_list:
    num_words.remove(i)
                
if size > 0: 
    cnt = cnt/size # keyword similarity = (total no. of times match is found)/(total number of keywords in question asked)
return cnt


def recommend_Questions(ques, question, answer, sim_th, bug_id, num_sim_wt, tfidf_sim_wt, location): 
#Vectorizing text with vocabulary belonging only to given bug ID and recommending Q&A pairs only if weighted average similarity (cosine sim: TF-IDF, numeric sim: technical term based extraction [increased weights]) surpasses the threshold)

data = []
ind = []

#putting sure shot questions into data list
for key, value in question.items():
    for ele in value:
        if ele[1] == 1:
            data.append(ele[0])
            ind.append(key)
            break

#generate tokens from data
gen_docs = []
for ele in data:
    tokens = ele.split()
    tok = []
    for element in tokens:
        element = element.lower().strip("? .:\'\"")
        if element != '':
            tok.append(element)
    gen_docs.append(tok)

#generating corpora dict
dictionary = gensim.corpora.Dictionary(gen_docs)

corpus = [dictionary.doc2bow(gen_doc) for gen_doc in gen_docs]

tf_idf = gensim.models.TfidfModel(corpus)

sims = gensim.similarities.Similarity(location,tf_idf[corpus],
                                        num_features=len(dictionary))

#tokenize question asked by CE
tokens = ques.split()
query_doc = []
for element in tokens:
    element = element.lower().strip("? .:\'\"")
    if element != '':
        query_doc.append(element)

#update an existing dictionary and
query_doc_bow = dictionary.doc2bow(query_doc) 

# perform a similarity query against the corpus
query_doc_tf_idf = tf_idf[query_doc_bow]

num_score = []
for ele in data:
    num_score.append(get_numeric_similarity(ques, ele, bug_id))

num_score = np.array(num_score)       
total_score = ((num_sim_wt/100)*num_score) + ((tfidf_sim_wt/100)*sims[query_doc_tf_idf]) #weighted average similarity

res = []
for i, sim in enumerate(total_score):
    if sim*100 >= sim_th:
        if bug_id in ind[i]:
            res.append([data[i], answer[ind[i]], sim*100, ind[i]])
            
res.sort(key = lambda x: x[2],reverse = True)

return res

 def recommend_Unanswered(ques, unanswered, sim_th, bug_id, num_sim_wt, tfidf_sim_wt, location): 
#Retrieving most similar unanswered question (similar to Q&A recommendation)

data = []
ind = []

#putting sure shot questions into data list
for key, value in unanswered.items():
    data.append(value[0])
    ind.append(key)

#generate tokens from data
gen_docs = []
for ele in data:
    tokens = ele.split()
    tok = []
    for element in tokens:
        element = element.lower().strip("? .:\'\"")
        if element != '':
            tok.append(element)
    gen_docs.append(tok)

#generating corpora dict
dictionary = gensim.corpora.Dictionary(gen_docs)

corpus = [dictionary.doc2bow(gen_doc) for gen_doc in gen_docs]

tf_idf = gensim.models.TfidfModel(corpus)
sims = gensim.similarities.Similarity(location,tf_idf[corpus],
                                        num_features=len(dictionary))

#tokenize question asked by CE
tokens = ques.split()
query_doc = []
for element in tokens:
    element = element.lower().strip("? .:\'\"")
    if element != '':
        query_doc.append(element)

#update an existing dictionary and
query_doc_bow = dictionary.doc2bow(query_doc) 

# perform a similarity query against the corpus
query_doc_tf_idf = tf_idf[query_doc_bow]

num_score = []
for ele in data:
    num_score.append(get_numeric_similarity(ques, ele, bug_id))

num_score = np.array(num_score)

total_score = ((num_sim_wt/100)*num_score) + ((tfidf_sim_wt/100)*sims[query_doc_tf_idf]) #weighted average similarity

res = []   
for i, sim in enumerate(total_score):
    if sim*100 >= sim_th:
        if bug_id in ind[i]:
            res.append([data[i] ,sim*100, ind[i]])
        
res.sort(key = lambda x: x[1],reverse = True)
        
return res


def NCE_query_similarity(mydb, query_doc, query_bug_id, location, q_sim_th, num_sim_wt, tfidf_sim_wt, uns_sim_th):

try:
    unanswered_db = mydb["unanswered"]
    questions_db = mydb["questions"]
    
    #answer: string or list look at it.
    question = {}
    answer = {}
    unanswered = {}
    
    ques = questions_db.find()
    unans = unanswered_db.find()


    content_lis = []
    counter = 1
    
    for i in ques:
        question[i['_id']] = i['question']
        answer[i['_id']] = i['answer']
    
    for i in unans:
        unanswered[i['_id']] = i['unanswered']
    
    question_bug = {}
    for k,v in question.items():
        if query_bug_id in k:
            question_bug[k] = v
    
    res = []
    if len(question_bug) > 0:
        res = recommend_Questions(query_doc, question_bug, answer, q_sim_th, query_bug_id, num_sim_wt, tfidf_sim_wt, location) #Retrieve similar Q&A pairs for asked query
    print(res)
    #len(res) > 0 : related Q&As
    if len(res) > 0:
        #return_Res = [type, [[counter, qn, ans, sim],......]]
        for i in res:
            content_lis.append([counter, i[0], i[1][0], i[2]])
            counter += 1
        return ["Related Q&As", content_lis]
    
    #checking similarity with stored unanswered questions 
    if len(res) == 0:
        unanswered_bug = {}
        
        for k,v in unanswered.items():
            if query_bug_id in k:
                unanswered_bug[k] = v
        
        if len(unanswered_bug) > 0:
            
            res = recommend_Unanswered(query_doc, unanswered_bug, uns_sim_th, query_bug_id, num_sim_wt, tfidf_sim_wt, location) #Retreive most similar unanswered question for asked query
            # res = [unans, sim, unans_id]
            
            if len(res) > 0: #Gentle remainder case
                
                unans_id_list = []
                for j in res:
                    unans_id_list.append(j[-1])
                with open("temp_unans_id.json", 'w') as fp:
                    json.dump(unans_id_list, fp)
                    
                #return_Res = [type, [[counter, unans, sim],......]]
                for i in res:
                    content_lis.append([counter, i[0], i[1]])
                    counter += 1
                    
                return ["Related Unanswered Query", content_lis]
                    
        if len(res) == 0:  #If asked question is not found similar to any of the stored Q&A or unanswered questions -> maybe completely new question asked
            return "No matching Questions Found"
except Exception as e:
    print(e)
    return None   

            


from Errno 104 - Connection reset by peer on docker

No comments:

Post a Comment