Using regex to parse some HTML, importing the re library.
No errors thrown, just doesn't return anything. I can run the regex and get the data parsed as I'd expect running the python in Eclipse, python 2.6 I think, any reason why it wouldn't work in Plex? Please no comments on the excellent quality of my regex ;)
Here's the function I'm using, where SCHEDULE_URL="http://www.liveonlinefooty.com/schedule.php":
def DisplaySchedule(sender):<br />
<br />
dir = MediaContainer(viewGroup="InfoList")<br />
<br />
## Define regex patterns for parsing schedule info<br />
channelPattern = '<a href="watchlive/\?(.*?)">(.*?)</a>'<br />
rowAgainPattern = '<div id="mycss.">(.*?)</div>'<br />
tablePattern = '<t.*>
.*<tr>
.*<td.*>(.*):(.*?)</td>
.*<td.*>(...) (..).. (...)</td>
.*<td.*><span.*>(.*)</span><br/>(.*)<br /><br />'<br />
<br />
## Open the schedule URL<br />
schedule = HTTP.Request(SCHEDULE_URL)<br />
# Get the local timezone<br />
tz = time.timezone<br />
<br />
# Parse each row in the table to get the match info<br />
for rowIterate in re.findall(rowAgainPattern, schedule, re.DOTALL):<br />
matchIterate = re.search(tablePattern,rowIterate)<br />
matchhour = (matchIterate.group(1))<br />
matchmins = (matchIterate.group(2))<br />
matchwday = (matchIterate.group(3))<br />
matchdate = (matchIterate.group(4))<br />
matchmont = (matchIterate.group(5))<br />
matchname = (matchIterate.group(6))<br />
matchcomp = (matchIterate.group(7))<br />
matchtime = time.ctime((time.mktime(time.strptime((matchmont + '/' + matchdate + '/10 ' + matchhour + ':' + matchmins + ':00'), "%b/%d/%y %H:%M:%S")))-tz)<br />
# Parse each found channel to create a channel viewing item<br />
for channelIterate in re.findall(channelPattern, rowIterate):<br />
url = WATCHURL + channelIterate[1]<br />
dir.Append(WebVideoItem(title=matchname, subtitle=matchtime + ' ' + channelIterate[1], url=url))<br />
<br />
return dir
Yes, I have the import re, don’t known the xpath stuff at all, but I’m not strongly opposed to it, just that I know the regex I have is correct in python terms, just not plex plugin python
Without regexing, using Xpath, your code can look like this (untested):
def DisplaySchedule(sender):<br />
<br />
dir = MediaContainer(viewGroup="InfoList")<br />
<br />
# Open the schedule URL<br />
# When using PMS framework v1:<br />
schedule = XML.ElementFromURL(SCHEDULE_URL, errors='ignore', isHTML=True)<br />
# Or when using PMS framework v2:<br />
#schedule = HTML.ElementFromURL(SCHEDULE_URL, errors='ignore')<br />
<br />
for row in schedule.xpath('//div[@id="mycss0" or @id="mycss1"]/table'):<br />
title = row.xpath('.//td[3]/span[@class="title"]')[0].text.strip() # Example: Juventus vs Man City<br />
comp = row.xpath('.//td[3]/text()[1]')[0].strip() # Example: UEFA Europa League<br />
<br />
time = row.xpath('.//td[1]')[0].text # Example: 18:00<br />
date = row.xpath('.//td[2]')[0].text # Example: Thu 16th Dec<br />
date = date.split(" ") # Example: ['Thu', '16th', 'Dec']<br />
<br />
day = date[1][0:-2] # Get the day of the month and chop off last 2 characters (st/nd/rd/th)<br />
month = time.strptime(date[2], "%b").tm_mon<br />
<br />
# We can't get the year from the website, so determine the right year by checking if the scraped month is smaller than the current month<br />
current_month = int(time.strftime("%m"))<br />
year = int(time.strftime("%Y"))<br />
if month < current_month:<br />
# Month is in the next year<br />
year = year + 1<br />
<br />
timestamp = time.mktime(time.strptime(' '.join([str(day), str(month), str(year), time]), "%d %m %Y %H:%M")) - time.timezone<br />
tuple = time.localtime(timestamp)<br />
subtitle = time.strftime('%a %d %b, %Y; %H:%M', t)<br />
<br />
for channel in row.xpath('.//a[contains(@href, "watchlive")]'):<br />
url = WATCHURL + channel.get('href')<br />
live_channel = channel.text<br />
dir.Append(WebVideoItem(url, title=title + ' (' + comp + ')', subtitle=subtitle + ' ' + live_channel))<br />
<br />