Warning:
This wiki has been archived and is now read-only.

Best Practices/Theme program

From Share-PSI EC Project
Jump to: navigation, search
import requests
import matplotlib.pyplot as plt

from wordcloud import WordCloud, STOPWORDS
import sys


uris = [
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Cross_Agency_Strategy',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/High_Level_Support',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Holistic_Metrics',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/User_engagement_and_collaboration_throughout_the_lifecycle',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Organisational-internal_engagement',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Human_Readability_and_Machine_Processing',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Cost_of_Publication',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Stakeholders%E2%80%99_Interests_and_Rights',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Feedback_to_Improve_Quality',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Optimization_for_Search_Engines',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Publication_with_Common_Metadata',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Catalogs_and_Indexes',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Encourage_crowdsourcing',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Publish_spatial_data_on_the_web',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Monitoring_and_Benchmarking',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Open_Data_quality_assessment',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Identifying_what_you_already_publish',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Make_the_data_available_in_the_language_people_want_it',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Management_Of_A_Wide_Public_Actors_Network',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Making_Research_Results_Open_For_The_Country',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Using_Business_Process_Paradigm_For_Open_Data_Lifecycle_Management',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Publishing_Statistical_Data_In_Linked_Data_Format',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Supervizor_-_An_Indispensable_Open_Government_Application_(Transparency_Of_Public_Spending)',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Civic_Use_Of_Open_Data',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Open_Data_Publication_Plan',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/A_Federation_Tool_For_Opendata_Portals',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Traffic_Light_System_For_Data_Sharing',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Open_Data_To_Improve_Sharing_And_Publication_Of_Information_Between_Public_Administrations',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Commercial_Considerations_in_Open_Data_Portal_Design',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Infomediary_Sector_Characteristics',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Open_Data_2.0_-_Changing_Perspectives',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Open_Data_Business_Model_Patterns_and_Open_Data_Business_Value_Disciplines',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/The_Central_Role_of_Location',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/An_ongoing_open_dialog_in_an_open_data_ecosystem',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Discover_published_information_by_site_scraping',
    'http://www.w3.org/2013/share-psi/wiki/Best_Practices/Free_our_maps']

replacements = ['=Title=',
                '=Short Description=',
                '=Overview=',
                '=Why=',
                '=Intended Outcome=',
                '=Life Cycle Stage=',
                '=Possible Approach=',
                '=How to Test=',
                '=Evidence=',
                '=Lifecycle Stage=',
                '=Audience=',
                '=Related Best Practices=',
                '=Tags=',
                '=Status=',
                '=Intended Audience=',
                'nowiki',
                'Name of the Share-PSI workshop:',
                'Title of the Best Practice:',
                'Outline of the best practice',
                'Management summary',
                'Challenge',
                'Solution.',
                'Best Practice Identification',
                'Why is this a Best Practice?',
                'What\'s the impact of the Best Practice?',
                'Link to the PSI Directive',
                'Why is there a need for this Best Practice?',
                'What do you need for this Best Practice?',
                'Applicability by other member states?',
                'Contact info - record of the person to be contacted for additional information or advice.']
                
debug=True

for i in uris:
    wc = WordCloud(background_color="white", max_words=2000,
               stopwords=STOPWORDS.add("data"))

    fname = i.split('/')[-1] + '.png'
    if debug: print(i + "?action=raw")
    text = requests.get(i + "?action=raw").text.lower()
    if debug: print(text)
    for q in replacements:
        text = text.replace(q,'')
    if debug: print(text)
    wc.generate(text)
    # show
    plt.imshow(wc)
    plt.axis("off")
    plt.savefig(fname, dpi=200, figsize=(7,6.5))
    
    #plt.show()
    plt.close()
    if debug: sys.exit(0)

print("Finished")