Source code for soweego.importer.models.imdb_entity
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""`IMDb <https://www.imdb.com/>`_
`SQLAlchemy <https://www.sqlalchemy.org/>`_ ORM entities, based on
the datasets `specifications <https://www.imdb.com/interfaces/>`__.
_download page: https://datasets.imdbws.com/
"""
__author__ = 'Marco Fossati, Andrea Tupini'
__email__ = 'fossati@spaziodati.eu, tupini07@gmail.com'
__version__ = '1.0'
__license__ = 'GPL-3.0'
__copyright__ = 'Copyleft 2019, Hjfocs, tupini07'
from sqlalchemy import Boolean, Column, Integer, String, Text
from sqlalchemy.ext.declarative import declarative_base
from soweego.importer.models.base_entity import BaseEntity, BaseRelationship
from soweego.wikidata import vocabulary
BASE = declarative_base()
NAME_TABLE = 'imdb_name'
TITLE_TABLE = 'imdb_title'
ACTOR_TABLE = 'imdb_actor'
DIRECTOR_TABLE = 'imdb_director'
MUSICIAN_TABLE = 'imdb_musician'
PRODUCER_TABLE = 'imdb_producer'
WRITER_TABLE = 'imdb_writer'
TITLE_NAME_RELATIONSHIP_TABLE = 'imdb_title_name_relationship'
[docs]class IMDbNameEntity(BaseEntity):
"""An IMDb *name*: a person like an actor, director, producer, etc.
It comes from the ``name.basics.tsv.gz`` dataset.
See the `download page`_
All ORM entities describing IMDb people should inherit this class.
**Attributes**:
- **gender** (string(10)) - a gender
- **occupations** (string(255)) - a string list of Wikidata occupation QIDs
"""
# Each entity should be represented by its main occupation QID
# defined in `soweego.wikidata.vocabulary`
table_occupation = None
__tablename__ = NAME_TABLE
gender = Column(String(10))
occupations = Column(String(255), nullable=True)
# IMDb has only years, so override `BaseEntity`
# and set default year precisions
born_precision = Column(Integer, default=9, nullable=False)
died_precision = Column(Integer, default=9, nullable=False)
__abstract__ = True
[docs]class IMDbTitleEntity(BaseEntity):
"""An IMDb *title*: an audiovisual work like a movie, short,
TV series episode, etc.
It comes from the ``title.basics.tsv.gz`` dataset.
See the `download page`_
All ORM entities describing IMDb works should inherit this class.
**Attributes:**
- **title_type** (string(100)) - an audiovisual work type, like *movie*
or *short*
- **primary_title** (text) - the most popular title
- **original_title** (text) - a title in the original language
- **is_adult** (boolean) - whether the audiovisual work is for adults or not
- **runtime_minutes** (integer) - a runtime in minutes
- **genres** (string(255)) - a string list of audiovisual genres
"""
__tablename__ = TITLE_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
title_type = Column(String(100))
primary_title = Column(Text)
original_title = Column(Text)
is_adult = Column(Boolean)
runtime_minutes = Column(Integer)
genres = Column(String(255), nullable=True)
def __repr__(self) -> str:
return (
f'<IMDbTitleEntity(catalog_id="{self.catalog_id}", '
f'original_title="{self.original_title}")>'
)
[docs]class IMDbActorEntity(IMDbNameEntity):
"""An IMDb actor."""
table_occupation = vocabulary.ACTOR_QID
__tablename__ = ACTOR_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
[docs]class IMDbDirectorEntity(IMDbNameEntity):
"""An IMDb director."""
table_occupation = vocabulary.FILM_DIRECTOR_QID
__tablename__ = DIRECTOR_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
[docs]class IMDbMusicianEntity(IMDbNameEntity):
"""An IMDb musician."""
table_occupation = vocabulary.MUSICIAN_QID
__tablename__ = MUSICIAN_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
[docs]class IMDbProducerEntity(IMDbNameEntity):
"""An IMDb producer."""
table_occupation = vocabulary.FILM_PRODUCER_QID
__tablename__ = PRODUCER_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
[docs]class IMDbWriterEntity(IMDbNameEntity):
"""An IMDb writer."""
table_occupation = vocabulary.SCREENWRITER_QID
__tablename__ = WRITER_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
[docs]class IMDbTitleNameRelationship(BaseRelationship):
"""A relationship between an IMDb audiovisual work and an IMDb
person who took part in it."""
__tablename__ = TITLE_NAME_RELATIONSHIP_TABLE
__mapper_args__ = {'polymorphic_identity': __tablename__, 'concrete': True}
def __repr__(self):
return super().__repr__()