Molecular simulation: Minimum Image Convention

Question

Can you review the following code to check to see if the Minimum Image Convention is properly implemented?

import random
import math
import matplotlib.pyplot as plt

polymer_chain_vec = []
N = 9
sigma = 2
epsilon = 2
periodic_boundary_int = 5  # size of the periodic box in A
temperature_float = 2  # temperature of the simulation
min_atom_distance_float = 1
max_atom_distance_float = 3.8
k = 1
sim_steps_int = 10000
write_steps_int = 10


def polymer_to_str():
    return ' '.join([f'({round(point_pt[0], 2)},{round(point_pt[1], 2)})' for point_pt in polymer_chain_vec])


def plot_energies(energies):
    plt.plot(range(len(energies)), energies)
    plt.ylabel('E')
    plt.xlabel('step')
    plt.show()


def apply_boundary_condition(point_pt):
    x, y = point_pt
    x = x % periodic_boundary_int
    y = y % periodic_boundary_int
    return [x, y]


def get_distance(point_one_pt, point_two_pt):
    dx = point_one_pt[0] - point_two_pt[0]
    dy = point_one_pt[1] - point_two_pt[1]
    dx -= periodic_boundary_int * round(dx / periodic_boundary_int)
    dy -= periodic_boundary_int * round(dy / periodic_boundary_int)
    return math.sqrt(dx**2 + dy**2)


def get_point_at_radius(current_point_pt, radius_double):
    angle = random.random() * math.pi * 2
    x = (math.cos(angle) * radius_double) + current_point_pt[0]
    y = (math.sin(angle) * radius_double) + current_point_pt[1]
    return apply_boundary_condition([x, y])


def morse_potential_func(r_float):
    return math.exp(-2 * sigma * (r_float - min_atom_distance_float)) \
           - 2 * math.exp(-sigma * (r_float - min_atom_distance_float))


def lj_energy_func(r_float):
    return 4 * epsilon * ((sigma ** 12 / r_float ** 12) - (sigma ** 6 / r_float ** 6))


def harmonic_energy_func(r_float):
    return k * ((r_float - max_atom_distance_float) ** 2)


def square_well_func(r_float):
    d = r_float
    en = 0
    if d < min_atom_distance_float:
        en += 10000000
    elif d < max_atom_distance_float:
        en += -1
    return en


def get_potential(index_int):
    bead_potential_float = 0
    harmonic_potential_float = 0
    current_loc_pt = polymer_chain_vec[index_int]
    for i, other_loc_pt in enumerate(polymer_chain_vec):
        if i != index_int:
            r_float = get_distance(current_loc_pt, other_loc_pt)
            bead_potential_float += morse_potential_func(r_float)

    if index_int >= 1:  # 1....8
        r_float = get_distance(current_loc_pt, polymer_chain_vec[index_int - 1])
        harmonic_potential_float += harmonic_energy_func(r_float)
    if index_int < len(polymer_chain_vec) - 1:  # 0....7
        r_float = get_distance(current_loc_pt, polymer_chain_vec[index_int + 1])
        harmonic_potential_float += harmonic_energy_func(r_float)

    return bead_potential_float + harmonic_potential_float


def get_total_potential():
    harmonic_float = 0
    pair_float = 0
    for i in range(len(polymer_chain_vec) - 1):
        bead_i = polymer_chain_vec[i]
        bead_i_plus_1 = polymer_chain_vec[i + 1]
        r_float = get_distance(bead_i, bead_i_plus_1)
        harmonic_float += harmonic_energy_func(r_float)

        for j in range(i + 1, len(polymer_chain_vec), 1):
            bead_j = polymer_chain_vec[j]
            r_float = get_distance(bead_i, bead_j)
            pair_float += morse_potential_func(r_float)

    return harmonic_float + pair_float


def initialize_polymer():
    current_point_pt = [0, 0]
    polymer_chain_vec.append(current_point_pt)
    for _ in range(N - 1):
        new_loc_pt = get_point_at_radius(current_point_pt, max_atom_distance_float)
        polymer_chain_vec.append(new_loc_pt)
        current_point_pt = new_loc_pt


def run_simulation(steps_int):
    for _ in range(steps_int):
        rand_index_int = random.randint(0, len(polymer_chain_vec) - 1)
        before_loc_pt = polymer_chain_vec[rand_index_int]
        before_pot_float = get_potential(rand_index_int)
        new_loc_pt = get_point_at_radius(before_loc_pt, max_atom_distance_float)
        polymer_chain_vec[rand_index_int] = new_loc_pt
        after_pot_float = get_potential(rand_index_int)
        pot_diff_float = after_pot_float - before_pot_float

        if pot_diff_float < 0:
            pass
        else:
            rand_float = random.random()
            if math.exp(-pot_diff_float / temperature_float) > rand_float:
                pass
            else:
                polymer_chain_vec[rand_index_int] = before_loc_pt


if __name__ == '__main__':
    initialize_polymer()
    total_pot_vec = []

    for ii in range(1, sim_steps_int, 1):
        run_simulation(write_steps_int)
        total_pot_double = round(get_total_potential())
        total_pot_vec.append(total_pot_double)

    plt.plot(range(len(total_pot_vec)), total_pot_vec)
    plt.show()

J_H · Accepted Answer · 2023-06-18 22:11:36Z

periodic_boundary_int = 5  # size of the periodic box in A
temperature_float = 2  # temperature of the simulation

Thank you for the first comment, that's lovely, very helpful. (Might as well spell it "Å".)

I read the second comment, and frankly I am not yet enlightened. Maybe it's °R? Maybe it's slightly above freezing point of water? I would like to assume it's kelvins, but I really want you to tell me that explicitly.

The _vec / _int / _float suffix is distracting, especially those last two. Best to elide it. We have perfectly nice ways of annotating as an aid to the Reader and mypy:

periodic_boundary: int = 5
temperature: float = 2

Definitely avoid the redundancy of e.g. point_pt.

As far as the atom distances go, I'm perfectly confident they are in ångströms, but it wouldn't hurt to group the three distances together. It's just a tiny nit.

Consider expressing the number of simulation steps as 10_000, for slightly improved readability.

That k global really frightens me. It's a short identifier, easy to confuse as a typo in e.g. i, j, k loops. I encourage you to rename it to something longer, perhaps harmonic_k. Or make it an optional keyword arg in lj_energy_func's signature.

Similar remarks for epsilon, sigma. And N is certainly a bit worrying.

The polymer chain is a mutable global list. That might motivate introducing a class which stores the chain as an object attribute.

def polymer_to_str():

I can't say I'm very fond of that signature. Rather than implicit coupling to a global, please explicitly spell out the relationship:

def polymer_to_str(polymer_chain: list) -> str:

You offered a Minimum Image link which framed it for me as being a problem about 3-space physics. But then the code seems to be about a 2-space simplification which the author introduced for didactic purposes.

It's worth at least a one-line comment to explain the constraints we choose to put on this particular implementation, to help folks understand what is currently in- or out-of-scope.

There's a bunch of obvious copy-n-paste boiler plate to treat the y dimension similar to the x dimension. There will be more of it if we expand upwards into z. Consider adopting numpy vectors, so software engineers can write about and think about quantities using familiar notation from mathematics and the sciences. Or at least write a one-line comment explaining the reason we chose to reject numpy's ndarrays.

The plot_energies() function is nice enough. Based on the Y label I assume the units are joules; it wouldn't hurt to make that explicit. Nothing calls this, so we might consider deleting it.

The plt.show() is fine, but you might find yourself deleting it later, or choosing to save to an output file. Often a paper will have small multiples of similar charts. Creating a chart and displaying a chart are separate concerns.

apply_boundary_condition() ends with

    return [x, y]

No. Please don't do that. Pythonic code would prefer to model "point" with a tuple rather than a list.

We use tuples for items with a fixed number of elements, where the index conveys the meaning. Here [0] says "x coordinate" and [1] says "y". Think of a tuple as sort of an immutable C struct. Use a dataclass instead if you need mutability.

We use lists for an arbitrary number of the "same" thing, for example:

    names: list[str] = ["Alice", "Bob"]
    points: list[tuple] = [(0, 0), (1, 1), (5, 5)]

Similarly in places like the last line of get_point_at_radius.

Kudos on starting that function with a nice tuple unpack, BTW.

Rather than get_distance(), maybe we want to call it get_lattice_distance()? Or maybe the cited reference offers some domain term for that concept? It is, on purpose, clearly different from simple Euclidean distance. A """docstring""" could spell that out.

In the get_point_at_radius() signature, radius_double is an especially unfortunate identifier, as on first reading I interpreted it as diameter.

Perhaps we should name it get_random_point_at_radius()? Or get_random_nearby_point(), and let the radius parameter speak about how "near" we mean.

I don't understand morse_potential_func(). I'm looking for the well depth D_e. Apparently we assume it is unity? Or maybe this is normalized_morse_potential()? Add a """docstring""" or maybe a # comment.

In lj_energy_func(), computing (σ ** 6 / r ** 6) seems slightly less convenient than computing (σ / r) ** 6. If there is some numeric analysis consideration going on here, please document it.

It's a perfectly good identifier, but you must spell it out in the docstring:

def lj_energy(r: float) -> float:
    """Returns the Lennard-Jones potential."""

For harmonic energy and the other two potential functions, I am concerned that caller might accidentally violate assumptions and then rely on an incorrect computed value.

For example if you expect a non-negative radius, spell that out explicitly, either with

    assert r >= 0, r

or

    if r < 0:
        raise ValueError(f"got a negative radius: {r}")

I am especially concerned that you might expect the radius shall not exceed the periodic boundary. So you might want two checks.

In square_well_func you kind of lost me.

        en += 10000000

As a kindness to the Reader, let's help with counting those zeros:

        en += 10_000_000

Ok, on to the substantive item. What does that even mean? 10 MJ? Where did it come from? This is a classic magic number. You need to name it, and it would be really helpful to cite some reference both for this and for the -1 quantity. Simplest way to name it is to stick it in the signature:

def square_well(r: float, mystery_energy=10_000_000) -> float:

I was expecting to see some mention of Planck's constant, and the particle mass, but I didn't notice them.

In get_potential(), this appears to be the Wrong comment:

    if index_int >= 1:  # 1....8

I mean, sure, it's probably numerically true ATM. But you wanted to phrase that in terms of N, I believe. (Plus, we should find a better name, perhaps N_ATOMS.)

It appears to me that get_potential() is where the action is. You've been building up primitives and here you combine them all together. So it is absolutely criminal that you chose to not write at least a one-sentence """docstring""" for this function. Tell me what I get, and how to interpret it. Cite the occasional reference. What assumptions have we made here?

In get_total_potential() we see

        bead_i_plus_1 = polymer_chain_vec[i + 1]

Arrgh! Don't do that.

We don't write comments like

        i += 1  # Increments the integer index so it is exactly 1 bigger.

and we don't invent identifiers like bead_i_plus_1. Call it next_bead and leave it at that. It's a local variable, so the Reader can easily locate the assignment to find the details.

Later we see

            pair_float += morse_potential_func(r_float)

which is just wrong. It's not a pair. Call it pair_potential or pair_energy or even just energy since it's summed across many pairs. Say what you mean and mean what you say.

Looking at def run_simulation(), it appears we could move the temperature global into the signature as a keyword default arg. It would be less trouble than turning all these functions into class methods.

The if __name__ guard is very nice. There's just eight lines of code, so it's not too long yet. But still, it would be worthwhile to push those eight lines into a def main(): function, so that some variables could become local variables, which go out of scope when the function exits.

Plus it definitely needs to be longer than eight lines. What result am I looking at? Give the chart a title. What do the X and Y axes depict? Give them labels, which include either SI units or "ångström" as applicable).

I inspect particle positions in this way:

    import numpy as np
    import pandas as pd
    import seaborn as sns

    pos = np.array(polymer_chain_vec)
    df = pd.DataFrame(pos, columns=['x', 'y'])
    sns.scatterplot(x=df.x, y=df.y)
    plt.show()

I guess the potentials are influencing where the particles wind up? But it's hard to see, looks pretty random. I would like to see some verifiable assertions made about the resulting polymer, maybe claims about what the distribution of nearest neighbor distances should look like.

Consider adopting optional type annotation, and linting with mypy.

This code appears to achieve its design goals. Certainly it computes a result without crashing.

It_word is_word hard_word to_word read_word.

Some rename refactors could quickly remedy that. I would not be willing to maintain this code in its current form.

QuasiStellar · Accepted Answer · 2023-06-18 21:41:19Z

bead_potential_float
harmonic_potential_float
current_loc_pt

Don't include types in variable names, use type hints instead. Those extra symbols pollute the code while not making it any more readable.

def get_distance(point_one_pt, point_two_pt):
    dx = point_one_pt[0] - point_two_pt[0]
    dy = point_one_pt[1] - point_two_pt[1]
    dx -= periodic_boundary_int * round(dx / periodic_boundary_int)
    dy -= periodic_boundary_int * round(dy / periodic_boundary_int)
    return math.sqrt(dx**2 + dy**2)

math.hypot function should be used for this.

def square_well_func(r_float):
    d = r_float
    en = 0
    if d < min_atom_distance_float:
        en += 10000000
    elif d < max_atom_distance_float:
        en += -1
    return en

It's hard to understand what is going on here, since the names don't indicate purposes of objects they are referring to. Anyway, assigning d to r_float and using += is redundant:

def square_well_func(r):
    if r < min_atom_distance:
        return 10000000
    elif r < max_atom_distance:
        return -1
    else:
        return 0

if pot_diff_float < 0:
    pass
else:
    rand_float = random.random()
    if math.exp(-pot_diff_float / temperature_float) > rand_float:
        pass
    else:
        polymer_chain_vec[rand_index_int] = before_loc_pt

Get rid of extra indentation by inverting an if-statement and replacing pass with continue:

if pot_diff_float < 0:
    continue

rand_float = random.random()
if math.exp(-pot_diff_float / temperature_float) <= rand_float:
    polymer_chain_vec[rand_index_int] = before_loc_pt

Stack Exchange Network

Molecular simulation: Minimum Image Convention

2 Answers 2

Not the answer you're looking for? Browse other questions tagged
python
simulation
physics
or ask your own question.

Hot Network Questions

Molecular simulation: Minimum Image Convention

2 Answers 2

Not the answer you're looking for? Browse other questions tagged pythonsimulationphysics or ask your own question.

Related

Hot Network Questions

Not the answer you're looking for? Browse other questions tagged
python
simulation
physics
or ask your own question.