Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cdprecorder/_storage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
import os


DEFAULT_SOCKET_NAME = "erpeto.sock"


def get_runtime_dir() -> str:
return os.getenv("XDG_RUNTIME_DIR")
363 changes: 363 additions & 0 deletions cdprecorder/erpeto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,363 @@
from __future__ import annotations

from typing import cast, Optional, Union, TYPE_CHECKING

import bs4
import bs4.builder._htmlparser
import pycdp
import requests
import sys
import twisted.internet.reactor

from twisted.python.log import err
from twisted.internet import defer, threads
from twisted.internet.interfaces import IReactorCore
from pycdp import cdp

import cdprecorder
from cdprecorder import generate_python, logger
from cdprecorder.action import (
BrowserAction,
InputAction,
HttpAction,
LowercaseStr,
RequestAction,
ResponseAction,
response_action_from_python_response,
)
from cdprecorder.recorder import (
HttpCommunication,
RecorderOptions,
record,
)

import cdprecorder.analyser

if TYPE_CHECKING:
import bs4

from pycdp.cdp.util import T_JSON_DICT
from twisted.python.failure import Failure

from cdprecorder.type_checking import CdpEvent, HttpTarget


# https://github.com/twisted/twisted/issues/9909
reactor = cast(IReactorCore, twisted.internet.reactor)


def generate_action(action: HttpAction, prev_new_actions: list[Optional[HttpAction]]) -> RequestAction:
new_action = RequestAction()
new_action.shallow_copy_from_action(action)
for target in action.targets:
target.apply(new_action, prev_new_actions)

return new_action


def run_actions(actions: list[HttpAction], proxies: Optional[list[str]] = None) -> None:
new_actions: list[Optional[HttpAction]] = []

for action in actions:
if isinstance(action, RequestAction):
new_action = generate_action(action, new_actions)
new_actions.append(new_action)

with requests.Session() as session:
req = requests.Request(
method=new_action.method,
url=new_action.url,
headers=new_action.headers,
data=new_action.body,
cookies=new_action.cookies_to_dict(),
)
prepared_request = req.prepare()
logger.debug("Replicating request: %s", prepared_request)
resp = session.send(prepared_request, allow_redirects=False, proxies=proxies)
resp_action = response_action_from_python_response(resp)
new_actions.append(resp_action)

print(f"{new_action.method} {new_action.url} - {resp.status_code}")

elif not isinstance(action, ResponseAction):
new_actions.append(None)


def to_cdp_event(event: CdpEvent) -> dict[str, Union[str, T_JSON_DICT]]:
cdp_method = None
for key, val in cdp.util._event_parsers.items():
if val == event.__class__:
cdp_method = key
break
else:
raise Exception

return {
"method": cdp_method,
"params": event.to_json(),
"type": "recv",
"domain": "-",
}


def get_only_http_actions(actions: list[BrowserActions]) -> list[HttpActions]:
return [action for action in actions if isinstance(action, HttpAction)]


def _generate_events_with_redirects_extracted(events: list[CdpEvent]) -> list[CdpEvent]:
new_events = []
future_events: list[CdpEvents] = []
wait_response_extra = False
wait_request_extra = False
wait_extra = False
for evt in events:
if wait_extra:
if (
isinstance(evt, cdp.network.RequestWillBeSentExtraInfo)
and not wait_request_extra
or isinstance(evt, cdp.network.ResponseReceivedExtraInfo)
and not wait_response_extra
):
wait_extra = False
new_events += future_events
future_events = []

if isinstance(evt, cdp.network.RequestWillBeSentExtraInfo):
wait_request_extra = False
elif isinstance(evt, cdp.network.ResponseReceivedExtraInfo):
wait_response_extra = False
wait_extra = wait_response_extra or wait_request_extra

new_events.append(evt)
continue
else:
new_events += future_events
future_events = []

future_events.append(evt)
if not isinstance(evt, cdp.network.RequestWillBeSent):
continue

if not evt.redirect_response:
new_events += future_events
future_events = []
wait_extra = False
else:
if evt.redirect_has_extra_info:
wait_response_extra = True
wait_request_extra = True
wait_extra = True

response_evt = cdp.network.ResponseReceived(
request_id=evt.request_id,
loader_id=evt.loader_id,
timestamp=evt.timestamp,
type_=evt.type_,
response=evt.redirect_response,
has_extra_info=evt.redirect_has_extra_info,
frame_id=evt.frame_id,
)
new_events.append(response_evt)

new_events += future_events

return new_events


def parse_communications_into_actions(
communications: list[Union[HttpCommunication, InputActioni]]
) -> list[BrowserAction]:
from cdprecorder import logger

actions: list[BrowserAction] = []

for comm in communications:
logger.debug("Comm: %s", repr(comm))

if not isinstance(comm, HttpCommunication):
actions.append(comm)
continue

if comm.ignored:
continue

response_bodies = list(comm.response_bodies)

curr_request: Optional[RequestAction] = None
request_extra: Optional[RequestAction] = None
curr_response: Optional[ResponseAction] = None
response_extra: Optional[ResponseAction] = None
events = _generate_events_with_redirects_extracted(comm.events)
print("--------------------------------------------------------")
# Append to actions the requests/responses from each event
for evt in events:
if isinstance(evt, cdp.network.RequestWillBeSent):
if curr_request is not None:
if all((curr_request, request_extra, curr_response)):
curr_request.has_response = True
actions.append(curr_request)
actions.append(curr_response)
else:
actions.append(curr_request)
if curr_response is not None:
curr_request.has_response = True
actions.append(curr_response)

curr_request = None
request_extra = None
curr_response = None

"""
if curr_request is not None:
# Consume the previous request
if curr_response:
curr_request.has_response = True
actions.append(curr_request)
curr_request = None
request_extra = None

if curr_response:
# Consume the previous response
actions.append(curr_response)
if response_extra:
raise Exception
curr_response = None
"""

curr_request = RequestAction()
curr_request.update_info(evt.request)
if evt.request.has_post_data and evt.request.post_data:
# TODO: Check if bytes in other entry
curr_request.set_body(evt.request.post_data.encode())

if request_extra is not None:
curr_request.merge(request_extra)

elif isinstance(evt, cdp.network.RequestWillBeSentExtraInfo):
if request_extra is not None:
if all((curr_request, request_extra, curr_response)):
curr_request.has_response = True
actions.append(curr_request)
curr_request = None
request_extra = None

actions.append(curr_response)
curr_response = None

"""
if curr_request is not None and request_extra is not None:
# Consume the previous request
if curr_response:
curr_request.has_response = True
actions.append(curr_request)
curr_request = None
request_extra = None

if curr_response:
# Consume the previous response
actions.append(curr_response)
if response_extra:
raise Exception
curr_response = None
"""

if request_extra is not None:
raise Exception
request_extra = RequestAction()
request_extra.update_info(evt)

if curr_request is not None:
curr_request.merge(request_extra)
# request_extra = None

elif isinstance(evt, cdp.network.ResponseReceived):
if curr_response is None:
curr_response = ResponseAction(evt.response)
else:
raise Exception

if response_extra is not None:
# Always merge response_extra over curr_response, not the other way
curr_response.merge(response_extra)
response_extra = None

elif isinstance(evt, cdp.network.ResponseReceivedExtraInfo):
if curr_response is not None:
# Always merge response_extra over curr_response, not the other way
curr_response.merge(ResponseAction(evt))
elif response_extra is None:
response_extra = ResponseAction(evt)
else:
raise Exception

elif isinstance(evt, cdp.network.LoadingFinished):
# Manually inserted
response_body = response_bodies.pop(0)
if response_body is not None:
if curr_response:
curr_response.set_body(response_body)
elif response_extra:
response_extra.set_body(response_body)
else:
raise Exception

if curr_request is not None:
if curr_response or response_extra:
curr_request.has_response = True
actions.append(curr_request)
curr_request = None
if curr_response is not None:
actions.append(curr_response)
curr_response = None
elif response_extra is not None:
actions.append(response_extra)
response_extra = None

if curr_request is not None:
if curr_response is not None:
curr_request.has_response = True
curr_request.merge(request_extra)
actions.append(curr_request)
if curr_response is not None:
# Always merge response_extra over curr_response, not the other way
curr_response.merge(response_extra)
actions.append(curr_response)

return actions


def make_action_ids_consecutive_from_list(actions: list[BrowserAction]):
for i, action in enumerate(actions):
action.ID = i


async def run_recorder(options: RecorderOptions):
communications = await record(options)
actions = parse_communications_into_actions(communications)
make_action_ids_consecutive_from_list(actions)

return actions


def run_analyse(actions):
cdprecorder.analyser.analyse_actions(actions)


def run_replicate(actions, proxies: Optional[list[str]] = None):
logger.debug("Start run_replicate")
run_actions(actions, proxies)


async def run(options: RecorderOptions) -> None:
actions = await run_recorder(options)
run_analyse(actions)
run_replicate(actions)

# actions = get_only_http_actions(actions)
# run_actions(actions)

# generate_python.write_python_code(actions, "generated.py")

"""
await threads.deferToThread(chrome.kill)
"""
Loading
Loading