001package net.filebot.format; 002 003import static java.nio.charset.StandardCharsets.*; 004import static java.util.Arrays.*; 005import static java.util.Collections.*; 006import static java.util.stream.Collectors.*; 007import static net.filebot.CachedResource.*; 008import static net.filebot.Settings.*; 009import static net.filebot.media.XattrMetaInfo.*; 010import static net.filebot.similarity.Normalization.*; 011import static net.filebot.util.FileUtilities.*; 012import static net.filebot.util.RegularExpressions.*; 013import static net.filebot.util.ZipUtilities.*; 014 015import java.io.File; 016import java.net.URI; 017import java.nio.ByteBuffer; 018import java.util.AbstractList; 019import java.util.AbstractMap; 020import java.util.LinkedHashMap; 021import java.util.List; 022import java.util.Map; 023import java.util.RandomAccess; 024import java.util.Set; 025import java.util.function.Function; 026import java.util.regex.Pattern; 027 028import org.jsoup.Jsoup; 029 030import groovy.json.JsonSlurper; 031import groovy.xml.XmlSlurper; 032 033import net.filebot.Cache; 034import net.filebot.CacheType; 035import net.filebot.InvalidInputException; 036import net.filebot.MemoryCache; 037import net.filebot.Resource; 038import net.filebot.platform.mac.MacAppUtilities; 039 040public abstract class DataResource { 041 042 public abstract Object getResource(); 043 044 public abstract boolean isStale(); 045 046 public abstract byte[] bytes() throws Exception; 047 048 public Map<Object, Object> csv() throws Exception { 049 Map<Object, Object> value = new LinkedHashMap<Object, Object>(); 050 051 // read CSV file 052 List<Pattern> delimiter = asList(TAB, EQUALS, SEMICOLON, PIPE, COLON, COMMA); 053 054 for (String line : lines()) { 055 if (line.startsWith("#")) { 056 continue; 057 } 058 059 for (Pattern d : delimiter) { 060 String[] field = d.split(line, 2); 061 if (field.length >= 2) { 062 // add key=value match 063 value.put(field[0], field[1]); 064 // lock delimiter 065 delimiter = singletonList(d); 066 break; 067 } 068 } 069 } 070 071 return value; 072 } 073 074 public String text() throws Exception { 075 ByteBuffer data = ByteBuffer.wrap(bytes()); 076 077 // users passing Microsoft Excel spreadsheet files instead of plain/text CSV files is surprisingly common 078 if (isZipFile(data)) { 079 throw new InvalidInputException(this + " is a ZIP archive and not a plain/text file"); 080 } 081 082 return decodeTextContent(data, true, UTF_8); 083 } 084 085 public List<String> lines() throws Exception { 086 return asList(NEWLINE.split(text())); 087 } 088 089 public Object xml() throws Exception { 090 return new XmlSlurper().parseText(text()); 091 } 092 093 public Object json() throws Exception { 094 return new JsonSlurper().parseText(text()); 095 } 096 097 public Object html() throws Exception { 098 return Jsoup.parse(text()); 099 } 100 101 public static DataResource local(File file) throws Exception { 102 return getDataResource(file, f -> { 103 if (isMacSandbox()) { 104 MacAppUtilities.askUnlockFolders(null, singleton(file)); 105 } 106 return new Memoized(new Local(f)); 107 }); 108 } 109 110 public static DataResource remote(URI uri) throws Exception { 111 return getDataResource(uri, f -> { 112 return new Memoized(new Remote(f)); 113 }); 114 } 115 116 private static <T> DataResource getDataResource(T resource, Function<T, DataResource> type) throws Exception { 117 DataResource data = cache.getIfPresent(resource); 118 if (data == null || data.isStale()) { 119 data = type.apply(resource); 120 cache.put(resource, data); 121 } 122 return data; 123 } 124 125 private static final MemoryCache<Object, DataResource> cache = MemoryCache.forMinutes(); 126 127 private static class Local extends DataResource { 128 129 private final File file; 130 private final boolean directory; 131 private final long lastModified; 132 133 public Local(File file) { 134 this.file = file; 135 this.directory = file.isDirectory(); 136 this.lastModified = file.lastModified(); 137 } 138 139 @Override 140 public Object getResource() { 141 return file; 142 } 143 144 @Override 145 public boolean isStale() { 146 return this.lastModified != file.lastModified(); 147 } 148 149 @Override 150 public byte[] bytes() throws Exception { 151 if (!file.isFile()) { 152 throw new InvalidInputException("File not found: " + file); 153 } 154 if (file.length() > ONE_GIGABYTE) { 155 throw new InvalidInputException("File is too large and probably not a plain/text file: " + file); 156 } 157 return readFile(file); 158 } 159 160 @Override 161 public Map<Object, Object> csv() throws Exception { 162 if (directory) { 163 return getMediaIndex(); 164 } 165 return super.csv(); 166 } 167 168 @Override 169 public List<String> lines() throws Exception { 170 if (directory) { 171 return getDirectoryIndex(); 172 } 173 return super.lines(); 174 } 175 176 public List<String> getDirectoryIndex() { 177 return asList(file.list()); 178 } 179 180 public Map<Object, Object> getMediaIndex() { 181 Map<Object, Object> value = new LinkedHashMap<Object, Object>(); 182 // read xattr file structure 183 for (File f : listFiles(file, NOT_HIDDEN)) { 184 Object m = xattr.getMetaInfo(f); 185 if (m != null) { 186 value.put(f, m); 187 } 188 } 189 return value; 190 } 191 } 192 193 private static class Remote extends DataResource { 194 195 private final URI url; 196 197 public Remote(URI url) { 198 this.url = url; 199 } 200 201 @Override 202 public Object getResource() { 203 return url; 204 } 205 206 @Override 207 public boolean isStale() { 208 return false; 209 } 210 211 @Override 212 public byte[] bytes() throws Exception { 213 return Cache.getConcurrentCache(Cache.URL, CacheType.Monthly).url(url.toURL()).get(); 214 } 215 } 216 217 public static class Post extends DataResource { 218 219 private final URI url; 220 private final String postData; 221 private final String contentType; 222 private final Map<String, String> requestHeader; 223 224 public Post(URI url, String postData, String contentType, Map<String, String> requestHeader) { 225 this.url = url; 226 this.postData = postData; 227 this.contentType = contentType; 228 this.requestHeader = requestHeader; 229 } 230 231 @Override 232 public Object getResource() { 233 return url; 234 } 235 236 @Override 237 public boolean isStale() { 238 return false; 239 } 240 241 @Override 242 public byte[] bytes() throws Exception { 243 return Cache.getConcurrentCache(Cache.URL, CacheType.Monthly).bytes(url + " " + postData, k -> url.toURL()).fetch(post(() -> { 244 return postData.getBytes(UTF_8); 245 }, () -> { 246 return contentType; 247 }, () -> { 248 return requestHeader; 249 })).get(); 250 } 251 } 252 253 private static class Memoized extends DataResource { 254 255 private final DataResource resource; 256 257 private final Resource<Map<Object, Object>> csv; 258 private final Resource<List<String>> lines; 259 260 private final Resource<String> text; 261 private final Resource<Object> xml; 262 private final Resource<Object> json; 263 private final Resource<Object> html; 264 265 public Memoized(DataResource resource) { 266 this.resource = resource; 267 268 this.csv = Resource.lazy(() -> new LookupMap(resource.csv())); 269 this.lines = Resource.lazy(() -> new LookupList(resource.lines())); 270 271 this.text = Resource.lazy(resource::text); 272 this.xml = Resource.lazy(resource::xml); 273 this.json = Resource.lazy(resource::json); 274 this.html = Resource.lazy(resource::html); 275 } 276 277 @Override 278 public Object getResource() { 279 return resource.getResource(); 280 } 281 282 @Override 283 public boolean isStale() { 284 return resource.isStale(); 285 } 286 287 @Override 288 public byte[] bytes() throws Exception { 289 return resource.bytes(); 290 } 291 292 @Override 293 public Map<Object, Object> csv() throws Exception { 294 return csv.get(); 295 } 296 297 @Override 298 public List<String> lines() throws Exception { 299 return lines.get(); 300 } 301 302 @Override 303 public String text() throws Exception { 304 return text.get(); 305 } 306 307 @Override 308 public Object xml() throws Exception { 309 return xml.get(); 310 } 311 312 @Override 313 public Object json() throws Exception { 314 return json.get(); 315 } 316 317 @Override 318 public Object html() throws Exception { 319 return html.get(); 320 } 321 } 322 323 protected static class LookupMap extends AbstractMap<Object, Object> { 324 325 private final Map<Object, Object> values; 326 327 public LookupMap(Map<Object, Object> values) { 328 this.values = values; 329 } 330 331 private String definingKey(Object key) { 332 // letters and digits are defining, everything else will be ignored 333 return normalizePunctuation(key.toString()).toLowerCase(); 334 } 335 336 private Map<String, Object> lookup; 337 338 private Map<String, Object> getLookup() { 339 if (lookup == null) { 340 lookup = values.entrySet().stream().collect(toMap(e -> definingKey(e.getKey()), e -> e.getValue(), (a, b) -> a, LinkedHashMap::new)); 341 } 342 return lookup; 343 } 344 345 @Override 346 public Object get(Object key) { 347 return getLookup().get(definingKey(key)); 348 } 349 350 @Override 351 public boolean containsKey(Object key) { 352 return getLookup().containsKey(definingKey(key)); 353 } 354 355 public Set<Object> keySet() { 356 return unmodifiableSet(getLookup().keySet()); 357 } 358 359 @Override 360 public Set<Entry<Object, Object>> entrySet() { 361 return unmodifiableSet(values.entrySet()); 362 } 363 } 364 365 protected static class LookupList extends AbstractList<String> implements RandomAccess { 366 367 private final List<String> values; 368 369 public LookupList(List<String> values) { 370 this.values = values; 371 } 372 373 @Override 374 public String get(int index) { 375 return values.get(index); 376 } 377 378 @Override 379 public int size() { 380 return values.size(); 381 } 382 383 private String definingKey(Object key) { 384 // letters and digits are defining, everything else will be ignored 385 return normalizePunctuation(key.toString()).toLowerCase(); 386 } 387 388 private Set<String> lookup; 389 390 private Set<String> getLookup() { 391 if (lookup == null) { 392 lookup = values.stream().map(this::definingKey).collect(toSet()); 393 } 394 return lookup; 395 } 396 397 public Set<String> keySet() { 398 return unmodifiableSet(getLookup()); 399 } 400 401 @Override 402 public boolean contains(Object object) { 403 if (object == null) { 404 return false; 405 } 406 return getLookup().contains(definingKey(object)); 407 } 408 } 409 410}