1212use OCP \ICache ;
1313use OCP \ICacheFactory ;
1414use Psr \Log \LoggerInterface ;
15+ use Throwable ;
1516
1617class PodcastDataReader {
1718 private ?ICache $ cache = null ;
@@ -67,22 +68,22 @@ public function fetchPodcastData(string $url, string $userId): ?PodcastData {
6768 }
6869
6970 private function fetchPodcastDataForUrl (string $ url ): PodcastData {
70- if ($ this ->isArdAudiothekUrl ($ url )) {
71- return $ this ->fetchArdAudiothekData ($ url );
71+ $ ardProgramId = $ this ->extractArdProgramId ($ url );
72+ if ($ ardProgramId !== null ) {
73+ try {
74+ return $ this ->fetchArdAudiothekData ($ ardProgramId , $ url );
75+ } catch (Throwable $ e ) {
76+ $ this ->logger ->warning ('Failed to resolve ARD Audiothek metadata, falling back to RSS parsing. ' , [
77+ 'url ' => $ url ,
78+ 'exception ' => $ e ,
79+ ]);
80+ }
7281 }
7382 $ resp = $ this ->fetchUrl ($ url );
7483 return PodcastData::parseRssXml ($ resp ->getBody ());
7584 }
7685
77- private function isArdAudiothekUrl (string $ url ): bool {
78- return (bool )preg_match (self ::ARD_PROGRAMSET_REGEX , $ url );
79- }
80-
81- private function fetchArdAudiothekData (string $ url ): PodcastData {
82- $ programId = $ this ->extractArdProgramId ($ url );
83- if ($ programId === null ) {
84- throw new \InvalidArgumentException ('Could not extract ARD Audiothek program id from URL ' );
85- }
86+ private function fetchArdAudiothekData (string $ programId , string $ originalUrl ): PodcastData {
8687 $ resp = $ this ->fetchUrl ("https:// " . self ::ARD_AUDIOTHEK_HOST . "/programsets/ $ programId " );
8788 $ body = $ resp ->getBody ();
8889 $ decoded = json_decode ($ body , true );
@@ -98,7 +99,7 @@ private function fetchArdAudiothekData(string $url): PodcastData {
9899 return new PodcastData (
99100 $ programSet ['title ' ] ?? null ,
100101 $ programSet ['publicationService ' ]['title ' ] ?? null ,
101- $ programSet ['sharingUrl ' ] ?? $ url ,
102+ $ programSet ['sharingUrl ' ] ?? $ originalUrl ,
102103 $ programSet ['synopsis ' ] ?? ($ programSet ['description ' ] ?? null ),
103104 $ this ->resolveArdImageUrl ($ programSet ['image ' ] ?? null ),
104105 (new DateTime ())->getTimestamp ()
@@ -116,7 +117,32 @@ private function extractArdProgramId(string $url): ?string {
116117 if (preg_match (self ::ARD_PROGRAMSET_REGEX , $ url , $ matches )) {
117118 return $ matches ['id ' ];
118119 }
119- return null ;
120+ return $ this ->extractArdProgramIdFromWebsite ($ url );
121+ }
122+
123+ private function extractArdProgramIdFromWebsite (string $ url ): ?string {
124+ $ parts = parse_url ($ url );
125+ if ($ parts === false ) {
126+ return null ;
127+ }
128+ $ host = strtolower ($ parts ['host ' ] ?? '' );
129+ if (!in_array ($ host , ['ardaudiothek.de ' , 'www.ardaudiothek.de ' ], true )) {
130+ return null ;
131+ }
132+ $ path = $ parts ['path ' ] ?? '' ;
133+ if ($ path === '' ) {
134+ return null ;
135+ }
136+ $ segments = array_values (array_filter (explode ('/ ' , $ path ), 'strlen ' ));
137+ if (count ($ segments ) < 3 ) {
138+ return null ;
139+ }
140+ $ section = strtolower ($ segments [0 ]);
141+ if (!in_array ($ section , ['sendung ' , 'podcast ' ], true )) {
142+ return null ;
143+ }
144+ $ possibleId = $ segments [count ($ segments ) - 1 ];
145+ return ctype_digit ($ possibleId ) ? $ possibleId : null ;
120146 }
121147
122148 private function tryFetchImageBlob (PodcastData $ data ): ?string {
0 commit comments