Resolving Go Import URLs

The package path that you import may not directly correlate to a repository URL. If Go can not find the package in your GOPATH, it will load the URL, redirecting as necessary, and will either look for a “meta” tag with a “name” attribute having value “go-import” or take whatever URL we are at when no more HTTP redirects are necessary (if any).

To speed things up, I wrote a short Python tool to do this and stashed it in a gist.

Make sure to install the dependencies:

  • requests
  • beautifulsoup4

Source:

import sys
import argparse
import urllib.parse

import requests
import bs4

def _get_cycle(url):
    while 1:
        print("Reading: {}".format(url))

        r = requests.get(url, allow_redirects=False)
        r.raise_for_status()

        bs = bs4.BeautifulSoup(r.text, 'html.parser')

        def meta_filter(tag): 
            # We're looking for meta-tags like this:
            #
            # <meta name="go-import" content="googlemaps.github.io/maps git https://github.com/googlemaps/google-maps-services-go">
            
            return \
                tag.name == 'meta' and \
                tag.attrs.get('name') == 'go-import'

        for m in bs.find_all(meta_filter):
            phrase = m.attrs['content']
            _, vcs, repo_url_root = phrase.split(' ')
            if vcs != 'git':
                continue

            return repo_url_root

        next_url = r.headers.get('Location')
        if next_url is None:
            break

        p = urllib.parse.urlparse(next_url)
        if p.netloc == '':
            # Take the schema, hostname, and port from the last URL.
            p2 = urllib.parse.urlparse(url)
            updated_url = '{}://{}{}'.format(p2.scheme, p2.netloc, next_url)
            print("  [{}] => [{}]".format(next_url, updated_url))

            url = updated_url
        else:
            url = next_url

    return url

def _main():
    description = "Determine the import URL for the given Go import path"
    parser = argparse.ArgumentParser(description=description)
    
    parser.add_argument(
        'import_path',
        help='Go import path')

    args = parser.parse_args()

    initial_url = "https://{}".format(args.import_path)
    final_url = _get_cycle(initial_url)

    print("Final URL: [{}]".format(final_url))

if __name__ == '__main__':
    _main()