Skip to content

Commit

Permalink
Updated README
Browse files Browse the repository at this point in the history
  • Loading branch information
Glitch (address-form) committed Oct 3, 2023
1 parent e9389b7 commit bf4655b
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 65 deletions.
14 changes: 14 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
*~
.bash_history
.DS_Store
.swp
.vscode/
client_secret.json
default.profraw
node_modules
oauth2.keys.json
keys.json
credentials.json
old
tmp
data
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# URL parts

View the site, origin, and other parts of a URL.
View the site, origin, and other parts of a URL: [url-parts.glitch.me](https://url-parts.glitch.me).


# Comments and suggestions
Expand Down
148 changes: 85 additions & 63 deletions js/main.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
/* Copyright 2023 Google LLC.
SPDX-License-Identifier: Apache-2.0 */

// Public Suffix List: https://publicsuffix.org/list/

// Public Suffix List: https://publicsuffix.org/list/
import pslEntries from './psl.js';
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
Expand All @@ -23,7 +25,7 @@ if (urlParam) {
handleUrl();

function handleUrl() {
let urlText = urlInput.value;
const urlText = urlInput.value;
// console.log('urlText:', urlText);

// Begin by removing `?url= ...` search string.
Expand Down Expand Up @@ -59,9 +61,9 @@ function handleUrl() {
// Hack to allow URLs without scheme.
url = urlText.match(/^https?:\/\//) ? new URL(urlText) :
new URL(`https://${urlText}`);
} catch {
console.log(`${urlText} is not a valid URL`);
urlPartsDiv.innerHTML = '';
} catch (error) {
console.log(`${urlText} is not a valid URL`, error);
urlPartsDiv.innerHTML = 'Not a valid URL.';
return;
}

Expand Down Expand Up @@ -117,72 +119,89 @@ function handleUrl() {
return;
}

let etldRegExp;
// Get the eTLD and eTLD+1.
const etld = pslEntries.find((el) => {
etldRegExp = new RegExp(`\\w+.${el}$`);
// if (el === 'co.uk') {
// }
return hostname.match(etldRegExp);
});
let etld1;
if (etld) {
const etld1RegExp = new RegExp(`[^\/\.]+\.${etld}`);
etld1 = urlText.match(etld1RegExp) && urlText.match(etld1RegExp)[0];
// Get the eTLD: this is the longest entry in the PSL
// Note that the PSL includes single-part entries (com, au, etc.)
// as well as multi-part entries (currently up to five parts).
// All assigned TLDs in the Root Zone Database are in the PSL.

let etld = '';
for (const pslEntry of pslEntries) {
// Hostname is not valid if it matches a PSL entry.
if (hostname === pslEntry) {
urlPartsDiv.innerHTML = `Not a valid URL: hostname <span id="input-hostname">${hostname}</span> is an ` +
`eTLD (see the <a href="https://publicsuffix.org/">Public Suffix List</a>).`;
return;
}

// Check for match at end of hostname only.
// Need to add \\. to avoid accepting hostnames that end in a valid (e)TLD, such as 'web.xcom'.
const pslEntryRegExp = new RegExp(`\\.${pslEntry.replaceAll('.', '\.')}$`);
// Find the longest eTLD in the PSL that matches the hostname (e.g. 'co.uk' rather than just 'co').
if (hostname.match(pslEntryRegExp) && pslEntry.length > etld.length) {
etld = pslEntry;
}
}

console.log('etld', etld);

if (!etld) {
urlPartsDiv.innerHTML = `No eTLD from the <a href="https://publicsuffix.org/">Public Suffix List</a>` +
` found in hostname <span id="input-hostname">${hostname}</span>.`;
return;
}

const etld1 = hostname.match(`[^\/\.]+\.${etld}`)[0];

if (!etld1) {
replace(`eTLD ${etld} specified, but no eTLD+1.`)
}


// The spans need to wrap the URL from the outside in:
// origin > originWithoutPort > hostname > site > eTLD+1 > eTLD > TLD.

urlPartsDiv.innerHTML = urlText.
replace(origin, `<span id="origin">${origin}</span>`);

// According to the URL standard, site must now include a scheme, so add a
// dotted border between the TLD+1 or eTLD+1 and the scheme.
// Although the URL standard now mandates that a site must include a scheme,
// span#site only wraps the eTLD+1.
// The scheme border is connected with the span#site border by a dotted border,
// by wrapping the whole origin (except the port) in span#site-dotted.
if (scheme) {
const siteDottedRegExp = new RegExp(`${scheme}.+${hostname}`);
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.
replace(siteDottedRegExp, '<span id="site-dotted">$&</span>');
}

// If the URL has a scheme, add a span to add a dashed border for site
// between the scheme and the rest of the site.
urlPartsDiv.innerHTML =
urlPartsDiv.innerHTML.replace(hostname, `<span id="hostname">${hostname}</span>`);

// Although the URL standard now mandates that a site must include a scheme,
// span#site only wraps the eTLD+1 or TLD+1.
// The scheme border is connected with the span#site border by a dotted border,
// by wrapping the whole origin (except the port) in span#site-dotted.
replace(etld1,
`<span id="etld1">${etld1}</span>`);

// If the URL uses an eTLD, add spans for eTLD+1 and eTLD.
if (etld) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(etld1,
`<span id="etld1">${etld1}</span>`);
// Site now requires scheme (according to the URL standard).
if (scheme) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(`<span id="etld1">${etld1}</span>`,
`<span id="etld1"><span id="site">${etld1}</span></span>`);
}
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(etld,
`<span id="etld">${etld}</span>`);
// Site now requires scheme.
} else if (scheme) {
// Not eTLD, so site is TLD+1.
// Site now requires scheme according to the URL standard,
// so a dotted line is added between the scheme and the other parts of site (see above).
if (scheme) {
replace(`<span id="etld1">${etld1}</span>`,
`<span id="etld1"><span id="site">${etld1}</span></span>`);
const site = hostname.split('.').slice(-2).join('.');
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(site, `<span id="site">${site}</span>`);
// replace(site, `<span id="site">${site}</span>`);
}

replace(etld,
`<span id="etld">${etld}</span>`);

// Wrap TLD in a span.
// If the hostname includes an eTLD, urlPartsDiv.innerHTML will be wrapped in a span.
// Otherwise, the whole hostname will be wrapped in a span.
const tld = hostname.split('.').pop();
// Check if tld is in the list at js/tld.js from the Root Zone Database.

// Double check that tld is in the list at js/tld.js from the Root Zone Database.
// All TLDs should also be in the PSL (checked earlier) so at this point the tld should always be valid.
if (tldEntries.includes(tld.toUpperCase())) {
const partBeforeTld = hostname.split('.').slice(-2, -1);
const tldRegExp = new RegExp(`${partBeforeTld}.(${tld})`);
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(tldRegExp,
partBeforeTld + '.<span id="tld">$1</span>');
// The TLD is the last part of span#etld
const tldRegExp = new RegExp(`(<span id="etld">[^<]*)(${tld})`);
replace(tldRegExp, '$1<span id="tld">$2</span>');
} else {
urlPartsDiv.innerHTML = 'TLD not found in the ' +
'<a href="https://www.iana.org/domains/root/db">Root Zone Database</a>.';
Expand All @@ -192,50 +211,53 @@ function handleUrl() {
// Hack: if the pathname is / then highlight the / after the origin
// (not a / after the scheme).
if (pathname === '/') {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(/\/$/,
replace(/\/$/,
`<span id="pathname">/</span>`);
} else if (pathname) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(pathname,
replace(pathname,
`<span id="pathname">${pathname}</span>`);
}

if (filename) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(filename,
replace(filename,
`<span id="filename">${filename}</span>`);
}
if (hash) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(hash,
replace(hash,
`<span id="hash">${hash}</span>`);
}

// TODO: surprisingly complex to get this to work with other URL parts!
// if (password) {
// urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(`:${password}@`,
// `:<span id="password">${password}</span>@`);
// }
// // TODO: surprisingly complex to get this to work with other URL parts!
// // if (password) {
// replace(`:${password}@`,
// // `:<span id="password">${password}</span>@`);
// // }

if (port) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(`:${port}`,
replace(`:${port}`,
`:<span id="port">${port}</span>`);
}
if (scheme) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(scheme,
replace(scheme,
// `<span id="scheme">${scheme}</span>`);
`<span id="scheme"><span id="origin-scheme"><span id="site-scheme">` +
`${scheme}</span></span></span>`);
}
// If the URL has a hash value *and* a search string,
// the URL API (for hash) returns the hash and the search string.
if (search) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(search,
replace(search,
`<span id="search">${search}</span>`);
}
}


// TODO: surprisingly complex to get this to work with other URL parts!
// if (username) {
// const usernameRegExp = new RegExp(`${username}([@:])`);
// urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(usernameRegExp,
// `<span id="username">${username}</span>$1`);
// }
};
// Utility functions

function log(label) {
console.log(label, urlPartsDiv.innerHTML);
}

function replace(pattern, replacement) {
urlPartsDiv.innerHTML = urlPartsDiv.innerHTML.replace(pattern, replacement);
}
2 changes: 1 addition & 1 deletion js/psl.js

Large diffs are not rendered by default.

0 comments on commit bf4655b

Please sign in to comment.