platform-helpers/proxy-logs.js

'use strict';

const lodash = require('lodash');
const rootRequire = require('rpcm-root-require');
const Utils = rootRequire('/platform-helpers/utils');
const LoggerWrapper = rootRequire('/platform-helpers/log4js-wrapper');
const logger = LoggerWrapper.getLogger();
rootRequire('/platform-helpers/string-extensions');

/**
 * Represents network requests captured by [mitmproxy](https://mitmproxy.org/).
 *
 * Returned by [proxyHelper.getLogs]{@link module:proxyHelper.getLogs}
 *
 * @hideconstructor
*/
class ProxyLogs {
  // no JSDoc here, only relevant for developers working on the tool
  constructor (har, steps) {
    /**
     * The raw HAR log information from the network capture.
     *
     * @type {object}
     * @example
     *{
     *  "log": {
     *     "version": "1.2",
     *     "creator": {
     *       "name": "mitmproxy har_dump",
     *       "version": "0.1",
     *       "comment": "mitmproxy version mitmproxy 6.0.2"
     *     },
     *     "entries": [
     *       {
     *         "startedDateTime": "2021-01-25T13:29:55.020851+00:00",
     *         "time": 640,
     *         "request": {
     *           "method": "GET",
     *           "url": "https://tags.tiqcdn.com/utag/tiqapp/utag.v.js?a=tealium-solutions/test-example/202005291402&cb=1611581394205",
     *           "httpVersion": "HTTP/1.1",
     *           "cookies": [],
     *           "headers": [
     *             {
     *               "name": "User-Agent",
     *               "value": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:84.0) Gecko/20100101 Firefox/84.0"
     *             },
     *             {
     *               "name": "Accept",
     *               "value": "*\/*"
     *             },
     *             {
     *               "name": "Accept-Language",
     *               "value": "en-US,en;q=0.5"
     *             },
     *             {
     *               "name": "Accept-Encoding",
     *               "value": "gzip, deflate, br"
     *             },
     *             {
     *               "name": "Referer",
     *               "value": "https://solutions.tealium.net/hosted/webdriver-testing/standard-integration-test.html"
     *             },
     *             {
     *               "name": "Host",
     *               "value": "tags.tiqcdn.com"
     *             },
     *             {
     *               "name": "Via",
     *               "value": "1.1 maki231 (squid/4.6)"
     *             },
     *             {
     *               "name": "Cache-Control",
     *               "value": "max-age=0"
     *             },
     *             {
     *               "name": "Connection",
     *               "value": "keep-alive"
     *             },
     *             {
     *               "name": "X-SL-Job-ID",
     *               "value": "3e756637232241df8514810fc6da95dc"
     *             },
     *             {
     *               "name": "X-SL-Tunnel-ID",
     *               "value": "a985b541abf04f18a3ef447451c81fc1"
     *             },
     *             {
     *               "name": "X-SL-Chef-IP",
     *               "value": "10.113.8.9"
     *             }
     *           ],
     *           "queryString": [
     *             {
     *               "name": "a",
     *               "value": "tealium-solutions/test-example/202005291402"
     *             },
     *             {
     *               "name": "cb",
     *               "value": "1611581394205"
     *             }
     *           ],
     *           "headersSize": 599,
     *           "bodySize": 0
     *         },
     *         "response": {
     *           "status": 200,
     *           "statusText": "OK",
     *           "httpVersion": "HTTP/1.1",
     *           "cookies": [],
     *           "headers": [
     *             {
     *               "name": "Accept-Ranges",
     *               "value": "bytes"
     *             },
     *             {
     *               "name": "Content-Type",
     *               "value": "application/x-javascript"
     *             },
     *             {
     *               "name": "ETag",
     *               "value": "\"7bc0ee636b3b83484fc3b9348863bd22:1460653071\""
     *             },
     *             {
     *               "name": "Last-Modified",
     *               "value": "Thu, 14 Apr 2016 16:57:51 GMT"
     *             },
     *             {
     *               "name": "Server",
     *               "value": "AkamaiNetStorage"
     *             },
     *             {
     *               "name": "Content-Length",
     *               "value": "2"
     *             },
     *             {
     *               "name": "Cache-Control",
     *               "value": "max-age=600"
     *             },
     *             {
     *               "name": "Expires",
     *               "value": "Mon, 25 Jan 2021 13:39:55 GMT"
     *             },
     *             {
     *               "name": "Date",
     *               "value": "Mon, 25 Jan 2021 13:29:55 GMT"
     *             },
     *             {
     *               "name": "Connection",
     *               "value": "keep-alive"
     *             }
     *           ],
     *           "content": {
     *             "size": 2,
     *             "compression": 0,
     *             "mimeType": "application/x-javascript",
     *             "text": "//"
     *           },
     *           "redirectURL": "",
     *           "headersSize": 422,
     *           "bodySize": 2
     *         },
     *         "cache": {},
     *         "timings": {
     *           "send": 15,
     *           "receive": 8,
     *           "wait": 318,
     *           "connect": 56,
     *           "ssl": 243
     *         }
     *       }
     *     ]
     *   }
     * }
     */
    this.rawLogs = har;

    let logs = har.log.entries.map((entry) => {
      const match = entry.startedDateTime.match(/^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3}).*$/);
      entry.startedDateTime = match[1] + 'Z'; // intentionally fragile, in case the log format changes this should break
      entry.request.urlWithoutQueryString = entry.request.url.split('?')[0];
      return entry;
    });

    logs = addFeaturesToLogs(logs);

    /**
     * An object with detailed network request info, listed per step.
     *
     * Each request will appear in the appropriate step array AND the allSteps array, to help make assertions simpler.
     *
     * Collect requests will also have more advanced payload parsing (parsed, see example.).
     *
     * In the interest of brevity, the example below doesn't show the same request in 'allSteps'.  The 'Accept' headers in this example have been modified to avoid breaking the JSDoc comments.
     *
     * @type {object}
     * @example
     * {
        "allSteps": [ '(omitted for brevity in this example)' ],
        "step1": [{
          "startedDateTime": "2021-01-28T13:04:06.982Z",
          "time": 2225,
          "request": {
            "method": "POST",
            "url": "https://collect-eu-central-1.tealiumiq.com/tealium-solutions/test-example/2/i.gif",
            "httpVersion": "HTTP/1.1",
            "cookies": [],
            "headers": {
              "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:85.0) Gecko/20100101 Firefox/85.0",
              "accept": "*\/*",
              "accept-language": "en-US,en;q=0.5",
              "accept-encoding": "gzip, deflate, br",
              "content-type": "multipart/form-data; boundary=---------------------------1787141617783135763328970803",
              "content-length": "1902",
              "origin": "https://solutions.tealium.net",
              "referer": "https://solutions.tealium.net/hosted/webdriver-testing/standard-integration-test.html",
              "host": "collect-eu-central-1.tealiumiq.com",
              "via": "1.1 maki3615 (squid/4.6)",
              "cache-control": "max-age=900",
              "connection": "keep-alive",
              "x-sl-job-id": "6c043688c72a4b93b67246927cf7527a",
              "x-sl-tunnel-id": "d85beb87f4d74aee9e1fefc7f0038523",
              "x-sl-chef-ip": "10.129.1.183"
            },
            "queryString": {},
            "headersSize": 813,
            "bodySize": 1902,
            "postData": {
              "mimeType": "multipart/form-data; boundary=---------------------------1787141617783135763328970803",
              "text": "-----------------------------1787141617783135763328970803\r\nContent-Disposition: form-data; name=\"data\"\r\n\r\n{\"loader.cfg\":{\"2\":{\"load\":4,\"send\":1,\"v\":202005291402,\"wait\":1,\"tid\":20064,\"id\":\"2\",\"executed\":1}},\"data\":{\"page_type\":\"first_test\",\"cp.utag_main_v_id\":\"0177491805bd00100897c23c7fad00052005500f00718\",\"cp.utag_main__pn\":\"1\",\"cp.utag_main_ses_id\":\"1611839047103\",\"cp.utag_main__ss\":\"1\",\"cp.utag_main__se\":\"1\",\"cp.utag_main__sn\":\"1\",\"cp.utag_main__st\":\"1611840847103\",\"dom.referrer\":\"\",\"dom.title\":\"Integration Test\",\"dom.domain\":\"solutions.tealium.net\",\"dom.query_string\":\"\",\"dom.hash\":\"\",\"dom.url\":\"https://solutions.tealium.net/hosted/webdriver-testing/standard-integration-test.html\",\"dom.pathname\":\"/hosted/webdriver-testing/standard-integration-test.html\",\"dom.viewport_height\":671,\"dom.viewport_width\":1024,\"ut.domain\":\"tealium.net\",\"ut.version\":\"ut4.46.202005291402\",\"ut.event\":\"view\",\"ut.visitor_id\":\"0177491805bd00100897c23c7fad00052005500f00718\",\"ut.session_id\":\"1611839047103\",\"ut.account\":\"tealium-solutions\",\"ut.profile\":\"test-example\",\"ut.env\":\"prod\",\"tealium_event\":\"first_test\",\"tealium_visitor_id\":\"0177491805bd00100897c23c7fad00052005500f00718\",\"tealium_session_id\":\"1611839047103\",\"tealium_session_number\":\"1\",\"tealium_session_event_number\":\"1\",\"tealium_datasource\":\"7hpfk3\",\"tealium_account\":\"tealium-solutions\",\"tealium_profile\":\"test-example\",\"tealium_environment\":\"prod\",\"tealium_random\":\"7091299497493456\",\"tealium_library_name\":\"utag.js\",\"tealium_library_version\":\"4.46.0\",\"tealium_timestamp_epoch\":1611839047,\"tealium_timestamp_utc\":\"2021-01-28T13:04:07.106Z\",\"tealium_timestamp_local\":\"2021-01-28T13:04:07.106\",\"cp.utag_main_dc_visit\":\"1\",\"cp.utag_main_dc_event\":\"1\"},\"browser\":{\"height\":671,\"width\":1024,\"screen_height\":768,\"screen_width\":1024,\"timezone_offset\":0},\"event\":\"view\",\"post_time\":1611839047115}\r\n-----------------------------1787141617783135763328970803--\r\n",
              "params": [],
              "parsed": {
                "loader.cfg": {
                  "2": {
                    "load": 4,
                    "send": 1,
                    "v": 202005291402,
                    "wait": 1,
                    "tid": 20064,
                    "id": "2",
                    "executed": 1
                  }
                },
                "data": {
                  "page_type": "first_test",
                  "cp.utag_main_v_id": "0177491805bd00100897c23c7fad00052005500f00718",
                  "cp.utag_main__pn": "1",
                  "cp.utag_main_ses_id": "1611839047103",
                  "cp.utag_main__ss": "1",
                  "cp.utag_main__se": "1",
                  "cp.utag_main__sn": "1",
                  "cp.utag_main__st": "1611840847103",
                  "dom.referrer": "",
                  "dom.title": "Integration Test",
                  "dom.domain": "solutions.tealium.net",
                  "dom.query_string": "",
                  "dom.hash": "",
                  "dom.url": "https://solutions.tealium.net/hosted/webdriver-testing/standard-integration-test.html",
                  "dom.pathname": "/hosted/webdriver-testing/standard-integration-test.html",
                  "dom.viewport_height": 671,
                  "dom.viewport_width": 1024,
                  "ut.domain": "tealium.net",
                  "ut.version": "ut4.46.202005291402",
                  "ut.event": "view",
                  "ut.visitor_id": "0177491805bd00100897c23c7fad00052005500f00718",
                  "ut.session_id": "1611839047103",
                  "ut.account": "tealium-solutions",
                  "ut.profile": "test-example",
                  "ut.env": "prod",
                  "tealium_event": "first_test",
                  "tealium_visitor_id": "0177491805bd00100897c23c7fad00052005500f00718",
                  "tealium_session_id": "1611839047103",
                  "tealium_session_number": "1",
                  "tealium_session_event_number": "1",
                  "tealium_datasource": "7hpfk3",
                  "tealium_account": "tealium-solutions",
                  "tealium_profile": "test-example",
                  "tealium_environment": "prod",
                  "tealium_random": "7091299497493456",
                  "tealium_library_name": "utag.js",
                  "tealium_library_version": "4.46.0",
                  "tealium_timestamp_epoch": 1611839047,
                  "tealium_timestamp_utc": "2021-01-28T13:04:07.106Z",
                  "tealium_timestamp_local": "2021-01-28T13:04:07.106",
                  "cp.utag_main_dc_visit": "1",
                  "cp.utag_main_dc_event": "1"
                },
                "browser": {
                  "height": 671,
                  "width": 1024,
                  "screen_height": 768,
                  "screen_width": 1024,
                  "timezone_offset": 0
                },
                "event": "view",
                "post_time": 1611839047115
              }
            },
            "urlWithoutQueryString": "https://collect-eu-central-1.tealiumiq.com/tealium-solutions/test-example/2/i.gif"
          },
          "response": {
            "status": 200,
            "statusText": "OK",
            "httpVersion": "HTTP/1.1",
            "cookies": [
              {
                "name": "TAPID",
                "value": "tealium-solutions/test-example>0177491805bd00100897c23c7fad00052005500f00718|",
                "path": "/",
                "domain": ".tealiumiq.com",
                "httpOnly": true,
                "secure": true,
                "expires": "2022-01-28T13:04:08+00:00"
              }
            ],
            "headers": {
              "date": "Thu, 28 Jan 2021 13:04:08 GMT",
              "content-type": "image/gif",
              "content-length": "43",
              "connection": "keep-alive",
              "x-acc": "tealium-solutions:test-example:2:datacloud",
              "x-did": "0177491805bd00100897c23c7fad00052005500f00718",
              "x-region": "eu-central-1",
              "access-control-allow-origin": "https://solutions.tealium.net",
              "x-serverid": "uconnect_i-06054a44183803790",
              "pragma": "no-cache",
              "p3p": "policyref=\"/w3c/p3p.xml\", CP=\"NOI DSP COR NID CUR ADM DEV OUR BUS\"",
              "access-control-expose-headers": "X-Region",
              "cache-control": "no-transform,private,no-cache,no-store,max-age=0,s-maxage=0",
              "x-tid": "0177491805bd00100897c23c7fad00052005500f00718",
              "access-control-allow-credentials": "true",
              "x-ulver": "ed533b75a08fa8f6edbe6695d0295a01b07dd99c-SNAPSHOT",
              "vary": "Origin",
              "expires": "Thu, 28 Jan 2021 13:04:08 GMT",
              "x-uuid": "24ba2778-6a42-412b-9dbc-94dacff69bed",
              "set-cookie": "TAPID=tealium-solutions/test-example>0177491805bd00100897c23c7fad00052005500f00718|; Path=/; Domain=.tealiumiq.com; Expires=Fri, 28-Jan-2022 13:04:08 GMT; Max-Age=31536000; Secure; HttpOnly; SameSite=None"
            },
            "content": {
              "size": 43,
              "compression": 0,
              "mimeType": "image/gif",
              "text": "R0lGODlhAQABAPAAAAAAAAAAACH5BAEAAAAALAAAAAABAAEAAAICRAEAOw==",
              "encoding": "base64"
            },
            "redirectURL": "",
            "headersSize": 1184,
            "bodySize": 43
          },
          "cache": {},
          "timings": {
            "send": 121,
            "receive": 33,
            "wait": 1061,
            "connect": 831,
            "ssl": 179
          },
          "stepNumber": 1,
          "stepName": "step1"
        }]
      }
    */
    this.logs = sortLogsByStep(logs, steps);

    /**
     * The steps that were recognized and used to split the network capture.
     *
     * @example
     * {
     *   "stepsSoFar": 4,
     *   "stepInfo": {
     *     "1": {
     *       "start": "2021-01-25T13:29:53.042Z",
     *       "name": "STEP 1 - initial page visit, set (and verify) Trace cookie, confirm some globals and helpers",
     *       "end": "2021-01-25T13:29:59.378Z"
     *     },
     *     "2": {
     *       "start": "2021-01-25T13:29:59.378Z",
     *       "name": "STEP 2 - reload the page, then increment the counter 4 times",
     *       "end": "2021-01-25T13:30:23.820Z"
     *     },
     *     "3": {
     *       "start": "2021-01-25T13:30:23.820Z",
     *       "name": "STEP 3 - decrement the counter 5 times",
     *       "end": "2021-01-25T13:30:45.128Z"
     *     },
     *     "4": {
     *       "start": "2021-01-25T13:30:45.128Z",
     *       "name": "STEP 4 - spoof a login and file import, setting the counter to 42"
     *     }
     *   }
     * }
     */
    this.steps = steps;

    /**
     * Returns a filtered subset of the provided 'logs' object (based on the request URL)
     *
     * Expects double-escaping because of the string conversion, like  `\\w+`,
     * see [doc](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/RegExp)
     *
     * @param {string} filterStringForRegex A double-escaped string that will be used to create a RegExp, uses as a filter
     * @returns A filtered 'logs' object with only matching entries, as in [ProxyLogs]{@link module:proxyHelper~ProxyLogs}
     * @example
     * it('should find a single TiQ session counter, in the first step', function () {
     *   chai.expect(proxyLogs.getFilteredLogs('/utag.v.js?').allSteps).to.have.lengthOf(1)
     *   chai.expect(proxyLogs.getFilteredLogs('/utag.v.js?').step1).to.have.lengthOf(1)
     * })
     */
    this.getFilteredLogs = function (filterStringForRegex) {
      const filtered = {};
      const logObject = this.logs;
      // make sure all steps have an array, even if it's empty
      const highestStep = this.steps.stepsSoFar;
      for (let i = 1; i <= highestStep; i++) {
        filtered[`step${i}`] = [];
      }
      filtered.allSteps = [];

      const re = new RegExp(filterStringForRegex);
      logObject.allSteps.forEach((logEntry) => {
        const key = logEntry.stepName;
        const match = re.test(logEntry.request.url);
        const isSslHandshake = /.*:443$/.test(logEntry.request.url);
        if (match && !isSslHandshake) {
          filtered[key].push(logEntry);
          filtered.allSteps.push(logEntry);
        }
      });
      return filtered;
    };

    function sortLogsByStep (inputLogArray, steps) {
      // make a deep copy
      const grouped = {};
      const logArray = lodash.cloneDeep(inputLogArray);
      const renamed = {};

      // ensure all stteps are defined with empty arrays at least
      renamed.allSteps = [];
      for (let i = 1; i <= steps.stepsSoFar; i++) {
        renamed[`step${i}`] = [];
      }

      function findStepNumber (entry) {
        const entryTime = Date.parse(entry.startedDateTime);

        // stop short of the last one
        for (let i = 1; i < steps.stepsSoFar; i++) {
          const step = steps.stepInfo[i];
          const start = Date.parse(step.start);
          const end = Date.parse(step.end);
          const inRange = entryTime >= start && entryTime < end;
          if (inRange) {
            return i;
          }
        }

        // greater or equal to the last step start means it's part of the last step
        if (entryTime >= Date.parse(steps.stepInfo[steps.stepsSoFar].start)) {
          return steps.stepsSoFar;
        }

        // otherwise, it must be stuff like browser setup before the start of the first step, put that as part of the first step
        return 1;
      }

      logArray.forEach((logEntry, i) => {
        const step = findStepNumber(logEntry);
        logEntry.stepNumber = step;
        logEntry.stepName = `step${logEntry.stepNumber}`;
        grouped[step] = grouped[step] || [];
        grouped[step].push(logEntry); // whole entry for now.
      });

      const sortedStepIds = Object.keys(grouped);
      sortedStepIds.forEach((id, index) => {
        const stepNumber = index + 1;
        const stepName = 'step' + stepNumber;
        renamed[stepName] = grouped[id];
        renamed[stepName].forEach((stepEntry) => {
          renamed.allSteps.push(stepEntry);
        });
      });
      return renamed;
    }

    function addFeaturesToLogs (inputLogArray) {
      // avoid side effects with a deep copy
      const logArray = lodash.cloneDeep(inputLogArray);

      logArray.forEach((entry, i) => {
        // reformat into more useful objects (and lowercase the keys)
        // [{ propName: 'Protocol', propValue: 'https' }] becomes
        // { 'protocol': 'https'}
        entry.request.headers = reformatObject(entry.request.headers);
        entry.response.headers = reformatObject(entry.response.headers);

        entry.request.queryString = reformatObject(entry.request.queryString);

        const isMultipartFormData = entry.request.headers['content-type'] && entry.request.headers['content-type'].indexOf('multipart/form-data;') !== -1;
        const isCollect = isMultipartFormData && /^https?:\/\/collect.*\/2\/i\.gif$/.test(entry.request.url);
        // hack to parse the payload for the Collect tag specifically, ignoring other tags for now because mulitpart form data is
        // hard to parse generally (and all the libraries expect buffers, received from file uploads) - Collect forms are pretty simple
        if (isCollect) {
          const boundary = entry.request.headers['content-type'].split('boundary=')[1];
          const body = entry.request.postData.text;
          const lines = body.split('\r\n');
          let collectBody;
          lines.forEach((line) => {
            if (line.indexOf(boundary) === -1 && /^{.*}$/.test(line)) {
              try {
                collectBody = JSON.parse(line);
              } catch (e) {
                collectBody = { failed: true };
              }
            }
          });
          entry.request.postData.parsed = collectBody;
        }

        // Add /events endpoint parsing as well for POST requests
        if (/^https:\/\/collect\.tealiumiq\.com\/event[/]?$/.test(entry.request.url)) {
          try {
            entry.request.postData.parsed = JSON.parse(entry.request.postData.text);
          } catch (e) {
            logger.error('proxy-logs.js ----> addFeaturesToLogs ----> Failed to parse {0}{1}{2}'.format(entry.request.postData.text, Utils.getNewLine(2), e));
          }
        }

        logArray[i] = entry;
      });
      return logArray;
    }

    /**
     * Reformat objects to make them easier to use.
     *
     * Input:
     * [
     *   {
     *     'name': 'Status',
     *     'value': '200'
     *    }
     *  ]
     *
     * Output:
     * {
     *   'Status': '200'
     * }
     * @private
     * @param {array} inputArray an array of objects with keys 'propName' and 'propValue'
     * @returns {object} the reformatted array in classic key/value style
     */
    function reformatObject (inputArray) {
      const outputObject = {};
      inputArray.forEach((entry) => {
        outputObject[entry.name.toLowerCase()] = entry.value;
      });
      return outputObject;
    }
  }
}

module.exports = ProxyLogs;