Source | Description | Function | Link |
---|---|---|---|
Mydramalist | A website with info on all the kdramas including rank, actors, descriptions, and more (from 2022 and back) | Webscraped each kdrama from the site, reading the json the file and cleaning the data to produce a dataframe | mydramalist |
Kaggle | A website holding a netflix dataset | Converted dataset into a dataframe to use for the project | kaggle |
Dataworld | A website holding a netflix dataset | Converted dataset into a dataframe to use for the project | dataworld |
def keys_exists(element, *keys):
'''
Check if *keys (nested) exists in `element` (dict).
'''
if not isinstance(element, dict):
raise AttributeError('keys_exists() expects dict as first argument.')
if len(keys) == 0:
raise AttributeError('keys_exists() expects at least two arguments, one given.')
_element = element
for key in keys:
try:
_element = _element[key]
except KeyError:
return False
return True
def get_data(soup):
'''gets data of kdrama from the json file
'''
# this is the json file with kdrama info
try:
data = json.loads(soup.find('script', type='application/ld+json').text)
return data
except AttributeError:
return False
def get_details(soup):
'''gets details of kdrama from soup
'''
return soup.select("div > div.container-fluid.title-container > div > div.col-lg-4.col-md-4 > div > div:nth-child(2) > div.box-body.light-b > ul > li")