Fix pattern in member page parser

Change-Id: Iff564b781de3562e0bb41be7f4dfa955d36ae9a5
This commit is contained in:
Ilya Shakhat 2015-01-17 00:33:04 +03:00
parent 55cc1877b2
commit 953ab3164c
2 changed files with 7 additions and 7 deletions

View File

@ -26,7 +26,7 @@ from stackalytics.processor import utils
LOG = logging.getLogger(__name__)
NAME_AND_DATE_PATTERN = (r'<h3>(?P<member_name>[^<]*)[\s\S]*?'
r'<div class="span-7 last">(?P<date_joined>[^<]*)')
r'<div class="span-\d last">(?P<date_joined>[^<]*)')
COMPANY_PATTERN = (r'<strong>Date\sJoined[\s\S]*?<b>(?P<company_draft>[^<]*)'
r'[\s\S]*?From\s(?P<date_from>[\s\S]*?)\(Current\)')
GARBAGE_PATTERN = r'[/\\~%^\*_]+'

View File

@ -36,16 +36,16 @@ class TestMps(testtools.TestCase):
<div class="last name-and-title">
<h3>Jim Battenberg</h3>
</div>
<hr><div class="span-3"><strong>Date Joined</strong></div>
<div class="span-7 last">June 25, 2013 <br><br></div>
<div class="span-3"><strong>Affiliations</strong></div>
<div class="span-7 last">
<hr><div class="span-4"><strong>Date Joined</strong></div>
<div class="span-6 last">June 25, 2013 <br><br></div>
<div class="span-4"><strong>Affiliations</strong></div>
<div class="span-6 last">
<div>
<b>Rackspace</b> From (Current)
</div>
</div>
<div class="span-3"><strong>Statement of Interest </strong></div>
<div class="span-7 last">
<div class="span-4"><strong>Statement of Interest </strong></div>
<div class="span-6 last">
<p>contribute logic and evangelize openstack</p>
</div>
<p>&nbsp;</p>'''