Fixed aliases

implements bp member-directory

Change-Id: I1a2ee49276316c05a9fd064bb1ffa39c2f2e9606
This commit is contained in:
pkholkin 2014-05-13 13:45:11 +04:00
parent 2314a454df
commit c601822563
5 changed files with 94 additions and 92 deletions

View File

@ -5693,12 +5693,11 @@
{ {
"domains": [""], "domains": [""],
"company_name": "*independent", "company_name": "*independent",
"aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有"] "aliases": ["None", "Non", "l-", ".", "****", "1", "aaa", "-", "dsadsadsadsad", "I dont have one", "company", "n/a", "Self", "Student", "home", "Home Based", "Independent", "Independen", "Independant", "MyHome", "HomeOffice", "Self Employeed", "Self Employed", "myself", "Self-employeed", "individual", "Individual Contributor", "Unaffiliated", "没有", "Null", "Univerisity", "fsfsf", "xxx"]
}, },
{ {
"domains": ["360.cn"], "domains": ["360.cn"],
"company_name": "Qihoo 360 Technology Co", "company_name": "Qihoo 360 Technology Co"
"aliases": ["Qihoo 360 Technology Co. Ltd."]
}, },
{ {
"domains": ["3ds.com"], "domains": ["3ds.com"],
@ -5706,8 +5705,7 @@
}, },
{ {
"domains": ["4loops.com"], "domains": ["4loops.com"],
"company_name": "Four Loops Solutions", "company_name": "Four Loops Solutions"
"aliases": ["Four Loops Solutions Pvt. Ltd.", "Four Loops Solutions Pvt Ltd", "Four Loops Solutions Pvt. Ltd"]
}, },
{ {
"domains": ["99cloud.net"], "domains": ["99cloud.net"],
@ -5724,8 +5722,7 @@
}, },
{ {
"domains": ["alyseo.com"], "domains": ["alyseo.com"],
"company_name": "Alyseo", "company_name": "Alyseo"
"aliases": ["ALYSEO"]
}, },
{ {
"domains": ["anl.gov"], "domains": ["anl.gov"],
@ -5745,18 +5742,15 @@
}, },
{ {
"domains": ["aristanetworks.com"], "domains": ["aristanetworks.com"],
"company_name": "Arista Networks", "company_name": "Arista Networks"
"aliases": ["Arista Networks Inc"]
}, },
{ {
"domains": ["arubanetworks.com"], "domains": ["arubanetworks.com"],
"company_name": "Aruba Networks", "company_name": "Aruba Networks"
"aliases": ["Aruba Networks, Inc."]
}, },
{ {
"domains": ["askbot.com"], "domains": ["askbot.com"],
"company_name": "Askbot", "company_name": "Askbot"
"aliases": ["Askbot, S.p.A."]
}, },
{ {
"domains": ["atomia.com"], "domains": ["atomia.com"],
@ -5768,13 +5762,11 @@
}, },
{ {
"domains": ["awcloud.com"], "domains": ["awcloud.com"],
"company_name": "Awcloud", "company_name": "Awcloud"
"aliases": ["awcloud"]
}, },
{ {
"domains": ["b1-systems.de"], "domains": ["b1-systems.de"],
"company_name": "B1 Systems", "company_name": "B1 Systems"
"aliases": ["B1 Systems GmbH"]
}, },
{ {
"domains": ["bacoosta.com"], "domains": ["bacoosta.com"],
@ -5782,12 +5774,12 @@
}, },
{ {
"domains": ["bestbuy.com"], "domains": ["bestbuy.com"],
"company_name": "Best Buy", "company_name": "Best Buy"
"aliases": ["Best Buy Corp."]
}, },
{ {
"domains": ["bigswitch.com"], "domains": ["bigswitch.com"],
"company_name": "Big Switch Networks" "company_name": "Big Switch Networks",
"aliases": ["Big Switch"]
}, },
{ {
"domains": ["bitergia.com"], "domains": ["bitergia.com"],
@ -5805,7 +5797,7 @@
{ {
"domains": ["brightcomputing.com"], "domains": ["brightcomputing.com"],
"company_name": "Bright Computing", "company_name": "Bright Computing",
"aliases": ["Bright Computing, BV", "Bright Computing, Inc."] "aliases": ["Bright Computing, BV"]
}, },
{ {
"domains": ["brinkster.com"], "domains": ["brinkster.com"],
@ -5833,7 +5825,7 @@
{ {
"domains": ["canonical.com"], "domains": ["canonical.com"],
"company_name": "Canonical", "company_name": "Canonical",
"aliases": ["Canonical Ltd"] "aliases": ["Canoncail, Ltd."]
}, },
{ {
"domains": ["centraldesktop.com"], "domains": ["centraldesktop.com"],
@ -5850,7 +5842,7 @@
{ {
"domains": ["cisco.com"], "domains": ["cisco.com"],
"company_name": "Cisco Systems", "company_name": "Cisco Systems",
"aliases": ["Cisco System", "Cisco Systems", "Cisco Systems Inc.", "Cisco Systems, Inc.", "Cisco Systems Inc., Intel, Microsoft, Dorkbotz", "Cisco System, Inc., Nebula, Inc.", "Cisco", "Cisco Inc"] "aliases": ["Cisco System", "Cisco Systems Inc., Intel, Microsoft, Dorkbotz", "Cisco System, Inc., Nebula, Inc.", "Cisco", "Cisco Inc"]
}, },
{ {
"domains": ["citrix.com"], "domains": ["citrix.com"],
@ -5863,12 +5855,11 @@
{ {
"domains": ["cloudbasesolutions.com"], "domains": ["cloudbasesolutions.com"],
"company_name": "Cloudbase Solutions", "company_name": "Cloudbase Solutions",
"aliases": ["Cloudbase Solutions Srl", "Cloudbase"] "aliases": ["Cloudbase"]
}, },
{ {
"domains": ["cloudbau.de"], "domains": ["cloudbau.de"],
"company_name": "Cloudbau", "company_name": "Cloudbau"
"aliases": ["cloudbau GmbH"]
}, },
{ {
"domains": ["cloudscaling.com"], "domains": ["cloudscaling.com"],
@ -5910,8 +5901,7 @@
}, },
{ {
"domains": ["cybera.ca"], "domains": ["cybera.ca"],
"company_name": "Cybera", "company_name": "Cybera"
"aliases": ["Cybera Inc"]
}, },
{ {
"domains": ["debian.org"], "domains": ["debian.org"],
@ -5921,7 +5911,7 @@
{ {
"domains": ["dell.com", "software.dell.com"], "domains": ["dell.com", "software.dell.com"],
"company_name": "Dell", "company_name": "Dell",
"aliases": ["Dell & Ganette Publishing", "Dell Inc", "Dell, Inc., Cabarrus County Schools"] "aliases": ["Dell & Ganette Publishing", "Dell, Inc., Cabarrus County Schools", "Dell & Ganette Publishing"]
}, },
{ {
"domains": ["denali-systems.com"], "domains": ["denali-systems.com"],
@ -5946,8 +5936,7 @@
}, },
{ {
"domains": ["ebay.com", "ebaysf.com"], "domains": ["ebay.com", "ebaysf.com"],
"company_name": "eBay", "company_name": "eBay"
"aliases": ["ebay inc", "eBay Inc.", "eBay, Inc."]
}, },
{ {
"domains": ["embrane.com"], "domains": ["embrane.com"],
@ -5956,7 +5945,7 @@
{ {
"domains": ["emc.com"], "domains": ["emc.com"],
"company_name": "EMC", "company_name": "EMC",
"aliases": ["EMC corp", "EMC Corporation", "EMC Corportion", "EMC employee; Russian Cloud Computing Professional Association - Head of executive commitee", "EMC, VMWare"] "aliases": ["EMC Corportion", "EMC employee; Russian Cloud Computing Professional Association - Head of executive commitee", "EMC, VMWare"]
}, },
{ {
"domains": ["endurancewindpower.com"], "domains": ["endurancewindpower.com"],
@ -5964,8 +5953,7 @@
}, },
{ {
"domains": ["enovance.com"], "domains": ["enovance.com"],
"company_name": "eNovance", "company_name": "eNovance"
"aliases": ["eNovance Inc"]
}, },
{ {
"domains": ["epam.com"], "domains": ["epam.com"],
@ -5974,7 +5962,7 @@
{ {
"domains": ["ericsson.com"], "domains": ["ericsson.com"],
"company_name": "Ericsson", "company_name": "Ericsson",
"aliases": ["Ericsson AB", "Ericsson Research"] "aliases": ["Ericsson AB", "Ericsson Research", "Ericcson AB"]
}, },
{ {
"domains": ["fathomdb.com"], "domains": ["fathomdb.com"],
@ -5991,8 +5979,7 @@
}, },
{ {
"domains": ["fujitsu.com"], "domains": ["fujitsu.com"],
"company_name": "Fujitsu", "company_name": "Fujitsu"
"aliases": ["Fujitsu Limited"]
}, },
{ {
"domains": ["getchef.com", "opscode.com"], "domains": ["getchef.com", "opscode.com"],
@ -6004,8 +5991,7 @@
}, },
{ {
"domains": ["godaddy.com"], "domains": ["godaddy.com"],
"company_name": "Go Daddy", "company_name": "Go Daddy"
"aliases": ["GoDaddy", "Go Daddy, LLC"]
}, },
{ {
"domains": ["gplhost.com"], "domains": ["gplhost.com"],
@ -6048,7 +6034,7 @@
{ {
"domains": ["hds.com"], "domains": ["hds.com"],
"company_name": "Hitachi", "company_name": "Hitachi",
"aliases": ["Hitachi Data Systems", "Hitachi, Ltd.", "Hitachi,Ltd."] "aliases": ["Hitachi Data Systems"]
}, },
{ {
"domains": ["hortonworks.com"], "domains": ["hortonworks.com"],
@ -6057,7 +6043,7 @@
{ {
"domains": ["hp.com"], "domains": ["hp.com"],
"company_name": "HP", "company_name": "HP",
"aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard"] "aliases": ["HP Cloud", "HP ES GD China", "HP, IBM", "HP Software", "HP Storage Division", "Hewlett Packard", "Hewlett-Packard Company", "Hewlett-Packard", "Hewllet-Packard", "HP R and D", "HP Cloud OS", "HP Networking", "hewelett-packard company", "HewlettPackard", "Hewlett-Pack"]
}, },
{ {
"domains": ["huawei.com"], "domains": ["huawei.com"],
@ -6067,7 +6053,7 @@
{ {
"domains": ["ibm.com", "linux.vnet.ibm.com"], "domains": ["ibm.com", "linux.vnet.ibm.com"],
"company_name": "IBM", "company_name": "IBM",
"aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India"] "aliases": ["IBM Australia", "IBM Canada", "IBM Canada Ltd", "IBM China", "IBM Corporation", "IBM India Pvt Ltd", "IBM India Pvt. Ltd.", "IBM Japan, Ltd.", "IBM Research", "IBM Research - China", "IBM Research Lab, India", "IBM Deutschland Research & Development GmbH", "International Business Machines Corporation"]
}, },
{ {
"domains": ["ifca.unican.es"], "domains": ["ifca.unican.es"],
@ -6091,7 +6077,7 @@
{ {
"domains": ["intel.com"], "domains": ["intel.com"],
"company_name": "Intel", "company_name": "Intel",
"aliases": ["Intel Corp.", "Intel Corporation", "Intel Media", "Intel OTC", "Intern at intel"] "aliases": ["Intel Media", "Intel OTC", "Intern at intel", "Intel Security"]
}, },
{ {
"domains": ["interhost.no"], "domains": ["interhost.no"],
@ -6133,8 +6119,7 @@
}, },
{ {
"domains": ["izeltech.com"], "domains": ["izeltech.com"],
"company_name": "Izel Technologies", "company_name": "Izel Technologies"
"aliases": ["Izel Technologies Inc."]
}, },
{ {
"domains": ["jhuapl.edu"], "domains": ["jhuapl.edu"],
@ -6194,8 +6179,7 @@
}, },
{ {
"domains": ["maginatics.com"], "domains": ["maginatics.com"],
"company_name": "Maginatics", "company_name": "Maginatics"
"aliases": ["Maginatics, Inc."]
}, },
{ {
"domains": ["managedit.ie"], "domains": ["managedit.ie"],
@ -6207,8 +6191,7 @@
}, },
{ {
"domains": ["memset.com"], "domains": ["memset.com"],
"company_name": "Memset", "company_name": "Memset"
"aliases": ["Memset Ltd"]
}, },
{ {
"domains": ["metacloud.com"], "domains": ["metacloud.com"],
@ -6220,8 +6203,7 @@
}, },
{ {
"domains": ["mirantis.com", "mirantis.ru"], "domains": ["mirantis.com", "mirantis.ru"],
"company_name": "Mirantis", "company_name": "Mirantis"
"aliases": ["Mirantis Inc", "Mirantis Inc.", "Mirantis, Inc", "Mirantis, Inc.", "Mirantis IT"]
}, },
{ {
"domains": ["mit.edu"], "domains": ["mit.edu"],
@ -6244,17 +6226,16 @@
{ {
"domains": ["nebula.com", "ansolabs.com"], "domains": ["nebula.com", "ansolabs.com"],
"company_name": "Nebula", "company_name": "Nebula",
"aliases": ["Nebula Inc.", "Nebula, Inc. ; CFO Tools"] "aliases": ["Nebula, Inc. ; CFO Tools", "Nebulaworks"]
}, },
{ {
"domains": ["nec.com", "nec.co.jp", "nectechnologies.in"], "domains": ["nec.com", "nec.co.jp", "nectechnologies.in"],
"company_name": "NEC", "company_name": "NEC",
"aliases": ["NEC Europe Ltd.", "NEC Soft, Ltd.", "NEC Technologies India Ltd."] "aliases": ["NEC Europe Ltd.", "NEC Soft, Ltd.", "NEC Technologies India Ltd.", "NEC Technlogies India Ltd"]
}, },
{ {
"domains": ["netapp.com"], "domains": ["netapp.com"],
"company_name": "NetApp", "company_name": "NetApp"
"aliases": ["NetApp Inc", "NetApp, Inc."]
}, },
{ {
"domains": ["netease.com"], "domains": ["netease.com"],
@ -6279,8 +6260,7 @@
}, },
{ {
"domains": ["nuagenetworks.net"], "domains": ["nuagenetworks.net"],
"company_name": "Nuage Networks", "company_name": "Nuage Networks"
"aliases": ["nuage networks"]
}, },
{ {
"domains": ["numergy.com", "numergy.fr"], "domains": ["numergy.com", "numergy.fr"],
@ -6293,7 +6273,7 @@
{ {
"domains": ["oneconvergence.com"], "domains": ["oneconvergence.com"],
"company_name": "One Convergence", "company_name": "One Convergence",
"aliases": ["One Convergence Devices Pvt. Ltd", "One Convergence Inc.", "OneConvergence", "Oneconvergence Devices Pvt Ltd", "One Convergence Devices"] "aliases": ["One Convergence Devices Pvt. Ltd", "Oneconvergence Devices Pvt Ltd", "One Convergence Devices"]
}, },
{ {
"domains": ["optiflows.com"], "domains": ["optiflows.com"],
@ -6301,8 +6281,7 @@
}, },
{ {
"domains": ["oracle.com"], "domains": ["oracle.com"],
"company_name": "Oracle", "company_name": "Oracle"
"aliases": ["Oracle Corp."]
}, },
{ {
"domains": ["orange.com"], "domains": ["orange.com"],
@ -6320,12 +6299,12 @@
{ {
"domains": ["persistent.co.in"], "domains": ["persistent.co.in"],
"company_name": "Persistent Systems", "company_name": "Persistent Systems",
"aliases": ["Persistent Systems Limited"] "aliases": ["Persistent System Limited", "persistent sys limited", "Persistent Ltd"]
}, },
{ {
"domains": ["pistoncloud.com"], "domains": ["pistoncloud.com"],
"company_name": "Piston Cloud", "company_name": "Piston Cloud",
"aliases": ["Piston Cloud Computing, Inc."] "aliases": ["Piston Cloud Computing, Inc.", "Piston"]
}, },
{ {
"domains": ["playhaven.com"], "domains": ["playhaven.com"],
@ -6333,8 +6312,7 @@
}, },
{ {
"domains": ["plumgrid.com"], "domains": ["plumgrid.com"],
"company_name": "PLUMgrid", "company_name": "PLUMgrid"
"aliases": ["Plumgrid inc", "Plumgrid Inc."]
}, },
{ {
"domains": ["pubyun.com"], "domains": ["pubyun.com"],
@ -6357,14 +6335,17 @@
"company_name": "Rackspace", "company_name": "Rackspace",
"aliases": ["Rackspace, Cloudscaling, Korea Telcom, friends with lots of people", "Rackspace.com", "Rackspace Hosting"] "aliases": ["Rackspace, Cloudscaling, Korea Telcom, friends with lots of people", "Rackspace.com", "Rackspace Hosting"]
}, },
{
"domains": ["rackwareinc.com"],
"company_name": "Rackware"
},
{ {
"domains": ["radisys.com"], "domains": ["radisys.com"],
"company_name": "Radisys" "company_name": "Radisys"
}, },
{ {
"domains": ["radware.com"], "domains": ["radware.com"],
"company_name": "Radware", "company_name": "Radware"
"aliases": ["Radware Ltd."]
}, },
{ {
"domains": ["ravellosystems.com"], "domains": ["ravellosystems.com"],
@ -6373,7 +6354,7 @@
{ {
"domains": ["redhat.com", "gluster.com"], "domains": ["redhat.com", "gluster.com"],
"company_name": "Red Hat", "company_name": "Red Hat",
"aliases": ["Red Hat Canada, Inc", "Red Hat Czech, s.r.o.", "Red Hat Inc.", "Red Hat, Inc., Bloomberg L.P.", "Red Hat India Pvt. Ltd.", "Red Hat Software", "RedHat"] "aliases": ["Red Hat Canada, Inc", "Red Hat Czech, s.r.o.", "Red Hat, Inc., Bloomberg L.P.", "Red Hat India Pvt. Ltd.", "Red Hat Software"]
}, },
{ {
"domains": ["reduxio.com"], "domains": ["reduxio.com"],
@ -6394,8 +6375,7 @@
}, },
{ {
"domains": ["scality.com"], "domains": ["scality.com"],
"company_name": "Scality", "company_name": "Scality"
"aliases": ["Scality Inc"]
}, },
{ {
"domains": ["sdsc.edu"], "domains": ["sdsc.edu"],
@ -6417,8 +6397,7 @@
}, },
{ {
"domains": ["snabb.co"], "domains": ["snabb.co"],
"company_name": "Snabb", "company_name": "Snabb"
"aliases": ["Snabb GmbH"]
}, },
{ {
"domains": ["softlayer.com"], "domains": ["softlayer.com"],
@ -6435,8 +6414,7 @@
}, },
{ {
"domains": ["spilgames.com"], "domains": ["spilgames.com"],
"company_name": "Spil Games", "company_name": "Spil Games"
"aliases": ["Spil Games B.V."]
}, },
{ {
"domains": ["stackinsider.com"], "domains": ["stackinsider.com"],
@ -6465,13 +6443,11 @@
}, },
{ {
"domains": ["swiftstack.com"], "domains": ["swiftstack.com"],
"company_name": "SwiftStack", "company_name": "SwiftStack"
"aliases": ["SwiftStack Inc."]
}, },
{ {
"domains": ["switch.ch"], "domains": ["switch.ch"],
"company_name": "Switch", "company_name": "Switch"
"aliases": ["SWITCH"]
}, },
{ {
"domains": ["symantec.com"], "domains": ["symantec.com"],
@ -6488,18 +6464,16 @@
}, },
{ {
"domains": ["telekom.de"], "domains": ["telekom.de"],
"company_name": "Deutsche Telekom", "company_name": "Deutsche Telekom"
"aliases": ["Deutsche Telekom AG"]
}, },
{ {
"domains": ["tesora.com", "parelastic.com"], "domains": ["tesora.com", "parelastic.com"],
"company_name": "Tesora Corp", "company_name": "Tesora Corp",
"aliases": ["ParElastic Corp", "ParElastic"] "aliases": ["ParElastic Corp", "ParElastic", "Tesora.com"]
}, },
{ {
"domains": ["thalesgroup.com", "mythalesgroup.com"], "domains": ["thalesgroup.com", "mythalesgroup.com"],
"company_name": "Thales", "company_name": "Thales"
"aliases": ["Thales Group"]
}, },
{ {
"domains": ["thoughtworks.com"], "domains": ["thoughtworks.com"],
@ -6515,8 +6489,7 @@
}, },
{ {
"domains": ["tunnelvisionlabs.com"], "domains": ["tunnelvisionlabs.com"],
"company_name": "Tunnel Vision Laboratories", "company_name": "Tunnel Vision Laboratories"
"aliases": ["Tunnel Vision Laboratories, LLC"]
}, },
{ {
"domains": ["ubisoft.com"], "domains": ["ubisoft.com"],
@ -6578,7 +6551,7 @@
{ {
"domains": ["vmware.com", "nicira.com"], "domains": ["vmware.com", "nicira.com"],
"company_name": "VMware", "company_name": "VMware",
"aliases": ["CYSO VMWARE DHPA"] "aliases": ["CYSO VMWARE DHPA", "VMware, Nicira, Telstra, Accenture"]
}, },
{ {
"domains": ["wanclouds.net"], "domains": ["wanclouds.net"],
@ -6608,13 +6581,11 @@
}, },
{ {
"domains": ["xlab.si"], "domains": ["xlab.si"],
"company_name": "Xlab", "company_name": "Xlab"
"aliases": ["XLAB d.o.o."]
}, },
{ {
"domains": ["yahoo-inc.com"], "domains": ["yahoo-inc.com"],
"company_name": "Yahoo!", "company_name": "Yahoo!"
"aliases": ["Yahoo"]
}, },
{ {
"domains": ["yandex-team.ru"], "domains": ["yandex-team.ru"],

View File

@ -124,7 +124,11 @@ def _store_companies(runtime_storage_inst, companies):
if 'aliases' in company: if 'aliases' in company:
for alias in company['aliases']: for alias in company['aliases']:
domains_index[alias] = company['company_name'] normalized_alias = utils.normalize_company_name(alias)
domains_index[normalized_alias] = company['company_name']
normalized_company_name = utils.normalize_company_name(
company['company_name'])
domains_index[normalized_company_name] = company['company_name']
runtime_storage_inst.set_by_key('companies', domains_index) runtime_storage_inst.set_by_key('companies', domains_index)
@ -175,7 +179,7 @@ def _get_changed_member_records(runtime_storage_inst, record_processor_inst):
if record['record_type'] == 'member' and 'company_name' in record: if record['record_type'] == 'member' and 'company_name' in record:
company_draft = record['company_draft'] company_draft = record['company_draft']
company_name = record_processor_inst.domains_index.get( company_name = record_processor_inst.domains_index.get(
company_draft) or company_draft utils.normalize_company_name(company_draft)) or company_draft
if company_name != record['company_name']: if company_name != record['company_name']:
record['company_name'] = company_name record['company_name'] = company_name

View File

@ -430,7 +430,8 @@ class RecordProcessor(object):
record['module'] = 'unknown' record['module'] = 'unknown'
company_draft = record['company_draft'] company_draft = record['company_draft']
company_name = self.domains_index.get(company_draft) or company_draft company_name = self.domains_index.get(utils.normalize_company_name(
company_draft)) or company_draft
# author_email is a key to create new user # author_email is a key to create new user
record['author_email'] = user_id record['author_email'] = user_id

View File

@ -189,3 +189,16 @@ def make_module_group(module_group_id, name=None, modules=None, tag='module'):
'module_group_name': name or module_group_id, 'module_group_name': name or module_group_id,
'modules': modules or [module_group_id], 'modules': modules or [module_group_id],
'tag': tag} 'tag': tag}
BAD_NAME_SUFFIXES = ['Ltd', 'Pvt', 'Inc', 'GmbH', 'AG', 'Corporation', 'Corp',
'Company', 'Co', 'Group', 'Srl', 'Limited', 'LLC', 'IT']
BAD_NAME_SUFFIXES_WITH_STOPS = ['S.p.A.', 's.r.o.', 'L.P.', 'B.V.', 'K.K.',
'd.o.o.']
def normalize_company_name(name):
regex = '(\\b(' + '|'.join(BAD_NAME_SUFFIXES) + ')\\b)'
regex += '|' + '((^|\\s)(' + '|'.join(BAD_NAME_SUFFIXES_WITH_STOPS) + '))'
name = re.sub(re.compile(regex, re.IGNORECASE), '', name)
return ''.join([c.lower() for c in name if c.isalnum()])

View File

@ -77,3 +77,16 @@ class TestUtils(testtools.TestCase):
{'index': 1, 'name': 'C'}] {'index': 1, 'name': 'C'}]
self.assertEqual(expected, utils.add_index( self.assertEqual(expected, utils.add_index(
sequence, start=0, item_filter=lambda x: x['name'] != 'B')) sequence, start=0, item_filter=lambda x: x['name'] != 'B'))
def test_normalize_company_name(self):
company_names = ['EMC Corporation', 'Abc, corp..', 'Mirantis IT.',
'Red Hat, Inc.', 'abc s.r.o. ABC', '2s.r.o. co',
'AL.P.B L.P. s.r.o. s.r.o. C ltd.']
correct_normalized_company_names = ['emc', 'abc', 'mirantis',
'redhat', 'abcabc', '2sro',
'alpbc']
normalized_company_names = [utils.normalize_company_name(name)
for name in company_names]
self.assertEqual(normalized_company_names,
correct_normalized_company_names)