''' Write the code in python to extract the title, h1 and h2 tag data of a web page (scraping), and apply a topic modeling function to the extracted strings. ''' import requests from bs4 import BeautifulSoup url = "https://www.example.com" # replace with the URL of the web page you want to scrape response = requests.get(url) soup = BeautifulSoup(response.content, "html.parser") # Extract title, h1 and h2 tags title = soup.title.string h1_tags = [tag.string for tag in soup.find_all("h1")] h2_tags = [tag.string for tag in soup.find_all("h2")] #topic modeling from gensim.corpora import Dictionary from gensim.models import LdaModel # Concatenate all the strings texts = h1_tags + h2_tags texts.append(title) # Create a dictionary of the words word_dict = Dictionary([text.lower().split() for text in texts]) # Convert the words to bag-of-words representation corpus = [word_dict.doc2bow(text.lower().split()) for text in texts] # Train the LDA model num_topics = 5 # change the number of topics as needed lda_model = LdaModel(corpus, num_topics=num_topics, id2word=word_dict) # Print the topics for i in range(num_topics): print(f"Topic {i+1}: {lda_model.print_topic(i)}\n")