I have a web project which I have hosted on the server. Frontend is angular, backend is flask and database is mongodb and all are made as a docker container linking with each other.
The problem arises when backend tried to fetch data from mongodb. It comes as --> "ip address of my server":27017: [Errno 104] Connection reset by peer
code is -
#Vectorization of text using TF/IDF and calculating similarity with asked question vector by increase
weights for technical terms
import gensim
import numpy as np
import re
import json
def get_keywords(ques, bug_id): #extract the keywords from the question like version numbers (ex, 16.6.1) and platforms (ex, CAT9k) and return list of keywords(useful for analytics)
num_words = []
for i in ques.split():
if re.search(r'\d', i) != None:
num_words.append(i.strip('? '))
for i in num_words:
if '/' in i:
lis = i.split('/')
num_words.remove(i)
for j in lis:
num_words.append(j.strip(' ?'))
num_words = [ i.lower() for i in num_words]
if bug_id.lower() in num_words:
num_words.remove(bug_id.lower())
return num_words
def get_numeric_similarity(ques, s, bug_id): #perform keyword similarity check (release versions and platforms,etc) b/w the question asked by CE and all the questions for that particular BUG in database
num_words,del_list = [],[]
s = s.lower()
cnt = 0
for i in ques.split():
if re.search(r'\d', i) != None:
num_words.append(i.strip('? '))
for i in num_words:
if '/' in i:
lis = i.split('/')
num_words.remove(i)
for j in lis:
num_words.append(j.strip(' ?'))
num_words = [ i.lower() for i in num_words]
if bug_id.lower() in num_words:
num_words.remove(bug_id.lower()) #num_words has all the keywords ,i.e, the words containing digits ex, release version (16.6.1) and the platform number (ex: cat9k), except for the BUG ID
size = len(num_words)
for i in num_words: #perform an exact similarity match: if the contents of num_words are present in s, increment cnt by 1 (ex, 16.6.1 in question asked matches with 16.6.1 in question in database)
if i in s:
cnt += 1
del_list.append(i)
for i in del_list:
num_words.remove(i)
del_list = []
for i in s.split(): #if the version number mentioned in s has 'x', for ex, 16.x release, then only check for the first set of digit/s before '.' ,i.e., '16' and if it matches, increment cnt by 1 (16.6.1 matches with 16.x)
if re.search(r'[0-9]{1,2}\.x\.x|[0-9]{1,2}\.x',i):
for ele in num_words:
if i.split('.')[0] in ele.split('.')[0]:
cnt += 1
del_list.append(ele)
for i in del_list:
num_words.remove(i)
find = re.findall(r'[0-9]{1,2}\.[0-9]{1,2}\.{0,1}[0-9]{0,2}',s)
del_list = []
for i in num_words: #if the question being asked has 'x' in the release version,i.e., 16.x release, then check with all the release versions in s and if the first set of digits match, increment cnt by 0.5 (16.x partially matches with 16.6.1)
if re.search(r'[0-9]{1,2}\.x\.x|[0-9]{1,2}\.x',i):
for ele in find:
if i.split('.')[0] == ele.split('.')[0]:
cnt += 0.5
del_list.append(i)
break
for i in del_list:
num_words.remove(i)
del_list = []
if len(num_words) > 0: #do a similarity check in case of 'k' in platform number (cat9000 matches with cat9k)
find1 = re.findall(r'[a-z]{1,3}\s{0,1}[0-9]000',s)
find2 = re.findall(r'[a-z]{1,3}\s{0,1}[0-9]k',s)
find = find1 + find2
find = ["".join(i.split()) for i in find]
find = [re.sub(r'000','k',i) for i in find]
num_words = [re.sub(r'000','k',i) for i in num_words]
for i in num_words:
if i in find:
cnt += 1
del_list.append(i)
for i in del_list:
num_words.remove(i)
#check for only digit similarity for ex, Catalyst 9000 matches with Cat9000
del_list = []
if len(num_words) > 0:
for i in num_words:
find = re.search(r'\d(.*)\d|\dk',i)
if find:
if find.group(0) in s:
cnt += 0.5
del_list.append(i)
for i in del_list:
num_words.remove(i)
if size > 0:
cnt = cnt/size # keyword similarity = (total no. of times match is found)/(total number of keywords in question asked)
return cnt
def recommend_Questions(ques, question, answer, sim_th, bug_id, num_sim_wt, tfidf_sim_wt, location):
#Vectorizing text with vocabulary belonging only to given bug ID and recommending Q&A pairs only if weighted average similarity (cosine sim: TF-IDF, numeric sim: technical term based extraction [increased weights]) surpasses the threshold)
data = []
ind = []
#putting sure shot questions into data list
for key, value in question.items():
for ele in value:
if ele[1] == 1:
data.append(ele[0])
ind.append(key)
break
#generate tokens from data
gen_docs = []
for ele in data:
tokens = ele.split()
tok = []
for element in tokens:
element = element.lower().strip("? .:\'\"")
if element != '':
tok.append(element)
gen_docs.append(tok)
#generating corpora dict
dictionary = gensim.corpora.Dictionary(gen_docs)
corpus = [dictionary.doc2bow(gen_doc) for gen_doc in gen_docs]
tf_idf = gensim.models.TfidfModel(corpus)
sims = gensim.similarities.Similarity(location,tf_idf[corpus],
num_features=len(dictionary))
#tokenize question asked by CE
tokens = ques.split()
query_doc = []
for element in tokens:
element = element.lower().strip("? .:\'\"")
if element != '':
query_doc.append(element)
#update an existing dictionary and
query_doc_bow = dictionary.doc2bow(query_doc)
# perform a similarity query against the corpus
query_doc_tf_idf = tf_idf[query_doc_bow]
num_score = []
for ele in data:
num_score.append(get_numeric_similarity(ques, ele, bug_id))
num_score = np.array(num_score)
total_score = ((num_sim_wt/100)*num_score) + ((tfidf_sim_wt/100)*sims[query_doc_tf_idf]) #weighted average similarity
res = []
for i, sim in enumerate(total_score):
if sim*100 >= sim_th:
if bug_id in ind[i]:
res.append([data[i], answer[ind[i]], sim*100, ind[i]])
res.sort(key = lambda x: x[2],reverse = True)
return res
def recommend_Unanswered(ques, unanswered, sim_th, bug_id, num_sim_wt, tfidf_sim_wt, location):
#Retrieving most similar unanswered question (similar to Q&A recommendation)
data = []
ind = []
#putting sure shot questions into data list
for key, value in unanswered.items():
data.append(value[0])
ind.append(key)
#generate tokens from data
gen_docs = []
for ele in data:
tokens = ele.split()
tok = []
for element in tokens:
element = element.lower().strip("? .:\'\"")
if element != '':
tok.append(element)
gen_docs.append(tok)
#generating corpora dict
dictionary = gensim.corpora.Dictionary(gen_docs)
corpus = [dictionary.doc2bow(gen_doc) for gen_doc in gen_docs]
tf_idf = gensim.models.TfidfModel(corpus)
sims = gensim.similarities.Similarity(location,tf_idf[corpus],
num_features=len(dictionary))
#tokenize question asked by CE
tokens = ques.split()
query_doc = []
for element in tokens:
element = element.lower().strip("? .:\'\"")
if element != '':
query_doc.append(element)
#update an existing dictionary and
query_doc_bow = dictionary.doc2bow(query_doc)
# perform a similarity query against the corpus
query_doc_tf_idf = tf_idf[query_doc_bow]
num_score = []
for ele in data:
num_score.append(get_numeric_similarity(ques, ele, bug_id))
num_score = np.array(num_score)
total_score = ((num_sim_wt/100)*num_score) + ((tfidf_sim_wt/100)*sims[query_doc_tf_idf]) #weighted average similarity
res = []
for i, sim in enumerate(total_score):
if sim*100 >= sim_th:
if bug_id in ind[i]:
res.append([data[i] ,sim*100, ind[i]])
res.sort(key = lambda x: x[1],reverse = True)
return res
def NCE_query_similarity(mydb, query_doc, query_bug_id, location, q_sim_th, num_sim_wt, tfidf_sim_wt, uns_sim_th):
try:
unanswered_db = mydb["unanswered"]
questions_db = mydb["questions"]
#answer: string or list look at it.
question = {}
answer = {}
unanswered = {}
ques = questions_db.find()
unans = unanswered_db.find()
content_lis = []
counter = 1
for i in ques:
question[i['_id']] = i['question']
answer[i['_id']] = i['answer']
for i in unans:
unanswered[i['_id']] = i['unanswered']
question_bug = {}
for k,v in question.items():
if query_bug_id in k:
question_bug[k] = v
res = []
if len(question_bug) > 0:
res = recommend_Questions(query_doc, question_bug, answer, q_sim_th, query_bug_id, num_sim_wt, tfidf_sim_wt, location) #Retrieve similar Q&A pairs for asked query
print(res)
#len(res) > 0 : related Q&As
if len(res) > 0:
#return_Res = [type, [[counter, qn, ans, sim],......]]
for i in res:
content_lis.append([counter, i[0], i[1][0], i[2]])
counter += 1
return ["Related Q&As", content_lis]
#checking similarity with stored unanswered questions
if len(res) == 0:
unanswered_bug = {}
for k,v in unanswered.items():
if query_bug_id in k:
unanswered_bug[k] = v
if len(unanswered_bug) > 0:
res = recommend_Unanswered(query_doc, unanswered_bug, uns_sim_th, query_bug_id, num_sim_wt, tfidf_sim_wt, location) #Retreive most similar unanswered question for asked query
# res = [unans, sim, unans_id]
if len(res) > 0: #Gentle remainder case
unans_id_list = []
for j in res:
unans_id_list.append(j[-1])
with open("temp_unans_id.json", 'w') as fp:
json.dump(unans_id_list, fp)
#return_Res = [type, [[counter, unans, sim],......]]
for i in res:
content_lis.append([counter, i[0], i[1]])
counter += 1
return ["Related Unanswered Query", content_lis]
if len(res) == 0: #If asked question is not found similar to any of the stored Q&A or unanswered questions -> maybe completely new question asked
return "No matching Questions Found"
except Exception as e:
print(e)
return None
from Errno 104 - Connection reset by peer on docker
No comments:
Post a Comment