NFOParser/nfoParser.py at main · Gykes/NFOParser · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
import os
import xml.etree.ElementTree as xml
import base64
import glob
import re
import requests
import config
import log
from abstractParser import AbstractParser

class NfoParser(AbstractParser):

    def __init__(self, scene_path, defaults=None, folder_mode=False):
        super().__init__()
        if defaults:
            self._defaults = defaults
        # Finds nfo file
        self._nfo_file = None
        if config.nfo_location.lower() == "with files":
            if folder_mode:
                # look in current dir & parents for a folder.nfo file...
                dir_path = os.path.dirname(scene_path)
                self._nfo_file = self._find_in_parents(dir_path, "folder.nfo")
            else:
                self._nfo_file = os.path.splitext(scene_path)[0] + ".nfo"
        # else:
            # TODO: support dedicated dir instead of "with files" (compatibility with nfo exporters)
        self._nfo_root = None

    def __match_image_files(self, files, pattern):
        thumb_images = []
        index = 0
        for file in files:
            if index >= self._image_Max:
                break
            if pattern.match(file):
                with open(file, "rb") as img:
                    img_bytes = img.read()
                thumb_images.append(img_bytes)
                index += 1
        return thumb_images

    def __extract_nfo_uniqueid(self):
        return self._nfo_root.findtext("uniqueid")

    def __read_cover_image_file(self):
        path_no_ext = os.path.splitext(self._nfo_file)[0]
        file_no_ext = os.path.split(path_no_ext)[1]
        # First look for images for a given scene name...
        files = sorted(glob.glob(f"{glob.escape(path_no_ext)}*.*"))
        file_pattern = re.compile("^.*" + re.escape(file_no_ext) + \
            "(-landscape\\d{0,2}|-thumb\\d{0,2}|-poster\\d{0,2}|-cover\\d{0,2}|\\d{0,2})\\.(jpe?g|png|webp)$", re.I)
        result = self.__match_image_files(files, file_pattern)
        if result:
            return result
        # Not found? Look tor folder image...
        path_dir = os.path.dirname(self._nfo_file)
        folder_files = sorted(glob.glob(f"{glob.escape(path_dir)}{os.path.sep}*.*"))
        folder_pattern = re.compile("^.*(landscape\\d{0,2}|thumb\\d{0,2}|poster\\d{0,2}|cover\\d{0,2})\\.(jpe?g|png|webp)$", re.I)
        result = self.__match_image_files(folder_files, folder_pattern)
        return result

    def ___find_thumb_urls(self, query):
        result = []
        matches = self._nfo_root.findall(query)
        for match in matches:
            result.append(match.text)
        return result

    def __download_cover_images(self):
        # Prefer "landscape" images, then "poster", otherwise take any thumbnail image...
        thumb_urls = self.___find_thumb_urls("thumb[@aspect='landscape']") \
            or self.___find_thumb_urls("thumb[@aspect='poster']") \
            or self.___find_thumb_urls("thumb")
        # Ensure there are images and the count does not exceed the max allowed...
        if len(thumb_urls) == 0:
            return []
        del thumb_urls[self._image_Max:]
        # Download images from url
        thumb_images = []
        for thumb_url in thumb_urls:
            img_bytes = None
            try:
                r = requests.get(thumb_url, timeout=10)
                img_bytes = r.content
                thumb_images.append(img_bytes)
            except Exception as e:
                log.LogDebug(
                    f"Failed to download the cover image from {thumb_url}: {repr(e)}")
        return thumb_images

    def __extract_cover_images_b64(self):
        if "cover_image" in config.blacklist:
            return []
        file_images = []
        # Get image from disk (file), otherwise from <thumb> tag (url)
        thumb_images = self.__read_cover_image_file() or self.__download_cover_images()
        for thumb_image in thumb_images:
            thumb_b64img = base64.b64encode(thumb_image)
            if thumb_b64img:
                file_images.append(
                    f"data:image/jpeg;base64,{thumb_b64img.decode('utf-8')}")
        return file_images

    def __extract_nfo_rating(self):
        user_rating = round(float(self._nfo_root.findtext("userrating") or 0))
        if user_rating > 0:
            return user_rating
        # <rating> is converted to a scale of 5 if needed
        rating = None
        rating_elem = self._nfo_root.find("ratings/rating")
        if rating_elem is not None:
            max_value = float(rating_elem.attrib["max"] or 1)
            value = float(rating_elem.findtext("value") or 0)
            # ratings on scale 100 (since stashapp v24)
            rating = round(value / max_value * 100)
        return rating

    def __extract_nfo_date(self):
        # date either in <premiered> (full) or <year> (only the year)
        year = self._nfo_root.findtext("year")
        if year is not None:
            year = f"{year}-01-01"
        return self._nfo_root.findtext("premiered") or year

    def __extract_nfo_tags(self):
        file_tags = []
        # from nfo <tag>
        tags = self._nfo_root.findall("tag")
        for tag in tags:
            if tag.text:
                file_tags.append(tag.text)
        # from nfo <genre>
        genres = self._nfo_root.findall("genre")
        for genre in genres:
            if genre.text:
                file_tags.append(genre.text)
        return list(set(file_tags))

    def __extract_nfo_actors(self):
        file_actors = []
        actors = self._nfo_root.findall("actor/name")
        for actor in actors:
            if actor.text:
                file_actors.append(actor.text)
        return file_actors

    def parse(self):
        if not self._nfo_file or not os.path.exists(self._nfo_file):
            return {}
        log.LogDebug("Parsing '{}'".format(self._nfo_file))
        # Parse NFO xml content
        try:
            with open(self._nfo_file, mode="r", encoding="utf-8") as nfo:
                # Tolerance: strip non-standard whitespaces/new lines
                clean_nfo_content = nfo.read().strip()
            # Tolerance: replace illegal "&nbsp;"
            clean_nfo_content = clean_nfo_content.replace("&nbsp;", " ")
            self._nfo_root = xml.fromstring(clean_nfo_content)
        except Exception as e:
            log.LogError(
                f"Could not parse nfo '{self._nfo_file}': {repr(e)}")
            return {}
        # Extract data from XML tree. Spec: https://kodi.wiki/view/NFO_files/Movies
        b64_images = self.__extract_cover_images_b64()
        file_data = {
            # TODO: supports stash uniqueid to match to existing scenes (compatibility with nfo exporter)
            "file": self._nfo_file,
            "source": "nfo",
            "title": self._nfo_root.findtext("originaltitle") or self._nfo_root.findtext("title") \
            or self._nfo_root.findtext("sorttitle") or self._get_default("title", "re"),
            "director": self._nfo_root.findtext("director") or self._get_default("director"),
            "details": self._nfo_root.findtext("plot") or self._nfo_root.findtext("outline") \
            or self._nfo_root.findtext("tagline") or self._get_default("details"),
            "studio": self._nfo_root.findtext("studio") or self._get_default("studio"),
            "uniqueid": self.__extract_nfo_uniqueid(),
            "date": self.__extract_nfo_date() or self._get_default("date"),
            "actors": self.__extract_nfo_actors() or self._get_default("actors"),
            # Tags are merged with defaults
            "tags": list(set(self.__extract_nfo_tags() + self._get_default("tags"))),
            "rating": self.__extract_nfo_rating() or self._get_default("rating"),
            "cover_image": None if len(b64_images) < 1 else b64_images[0],
            "other_image": None if len(b64_images) < 2 else b64_images[1],
            # Below are NFO extensions or liberal tag interpretations (not part of the nfo spec)
            "movie": self._nfo_root.findtext("set/name") or self._get_default("title", "nfo"),
            "scene_index": self._nfo_root.findtext("set/index") or None,
            # TODO: read multiple URL tags into array
            "urls": None if not self._nfo_root.findtext("url") else [self._nfo_root.findtext("url")],

        }
        return file_data